mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
28 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
13563e2598 | ||
![]() |
9571f85d53 | ||
![]() |
0e69756634 | ||
![]() |
9653bca1e2 | ||
![]() |
1c0719e8a4 | ||
![]() |
8b4b4dc613 | ||
![]() |
e76feaced8 | ||
![]() |
5e088d00d0 | ||
![]() |
972d4d70db | ||
![]() |
e96a6bd699 | ||
![]() |
fb9163185a | ||
![]() |
6e8b8ed34f | ||
![]() |
c0aadbcc99 | ||
![]() |
3da149418a | ||
![]() |
720610cce5 | ||
![]() |
cedcf4d070 | ||
![]() |
81b50c3c71 | ||
![]() |
0e1e88f53e | ||
![]() |
45c77a5c81 | ||
![]() |
dbce7e0721 | ||
![]() |
6d66051de6 | ||
![]() |
b93be8816a | ||
![]() |
19b0ac6d5c | ||
![]() |
3da2b958cf | ||
![]() |
dc2f8d81d3 | ||
![]() |
fc97ef174a | ||
![]() |
13523a12f9 | ||
![]() |
1b76cee239 |
29
Makefile.am
29
Makefile.am
@@ -21,15 +21,6 @@ cpuminer_SOURCES = \
|
||||
api.c \
|
||||
sysinfos.c \
|
||||
algo-gate-api.c\
|
||||
crypto/oaes_lib.c \
|
||||
crypto/c_keccak.c \
|
||||
crypto/c_groestl.c \
|
||||
crypto/c_blake256.c \
|
||||
crypto/c_jh.c \
|
||||
crypto/c_skein.c \
|
||||
crypto/hash.c \
|
||||
crypto/aesb.c \
|
||||
crypto/magimath.cpp \
|
||||
algo/argon2/argon2a/argon2a.c \
|
||||
algo/argon2/argon2a/ar2/argon2.c \
|
||||
algo/argon2/argon2a/ar2/opt.c \
|
||||
@@ -76,10 +67,6 @@ cpuminer_SOURCES = \
|
||||
algo/bmw/bmw512-gate.c \
|
||||
algo/bmw/bmw512.c \
|
||||
algo/bmw/bmw512-4way.c \
|
||||
algo/cryptonight/cryptolight.c \
|
||||
algo/cryptonight/cryptonight-common.c\
|
||||
algo/cryptonight/cryptonight-aesni.c\
|
||||
algo/cryptonight/cryptonight.c\
|
||||
algo/cubehash/cubehash_sse2.c\
|
||||
algo/cubehash/cube-hash-2way.c \
|
||||
algo/echo/sph_echo.c \
|
||||
@@ -102,9 +89,6 @@ cpuminer_SOURCES = \
|
||||
algo/hamsi/hamsi-hash-4way.c \
|
||||
algo/haval/haval.c \
|
||||
algo/haval/haval-hash-4way.c \
|
||||
algo/heavy/sph_hefty1.c \
|
||||
algo/heavy/heavy.c \
|
||||
algo/heavy/bastion.c \
|
||||
algo/hodl/aes.c \
|
||||
algo/hodl/hodl-gate.c \
|
||||
algo/hodl/hodl-wolf.c \
|
||||
@@ -123,8 +107,6 @@ cpuminer_SOURCES = \
|
||||
algo/keccak/sha3d-4way.c \
|
||||
algo/keccak/sha3d.c \
|
||||
algo/lanehash/lane.c \
|
||||
algo/luffa/sph_luffa.c \
|
||||
algo/luffa/luffa.c \
|
||||
algo/luffa/luffa_for_sse2.c \
|
||||
algo/luffa/luffa-hash-2way.c \
|
||||
algo/lyra2/lyra2.c \
|
||||
@@ -146,14 +128,14 @@ cpuminer_SOURCES = \
|
||||
algo/lyra2/allium.c \
|
||||
algo/lyra2/phi2-4way.c \
|
||||
algo/lyra2/phi2.c \
|
||||
algo/m7m.c \
|
||||
algo//m7m/m7m.c \
|
||||
algo/m7m/magimath.cpp \
|
||||
algo/nist5/nist5-gate.c \
|
||||
algo/nist5/nist5-4way.c \
|
||||
algo/nist5/nist5.c \
|
||||
algo/nist5/zr5.c \
|
||||
algo/panama/panama-hash-4way.c \
|
||||
algo/panama/sph_panama.c \
|
||||
algo/radiogatun/sph_radiogatun.c \
|
||||
algo/quark/quark-gate.c \
|
||||
algo/quark/quark.c \
|
||||
algo/quark/quark-4way.c \
|
||||
@@ -176,12 +158,12 @@ cpuminer_SOURCES = \
|
||||
algo/ripemd/lbry-4way.c \
|
||||
algo/scrypt/scrypt.c \
|
||||
algo/scrypt/neoscrypt.c \
|
||||
algo/scrypt/pluck.c \
|
||||
algo/sha/sph_sha2.c \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/sha/sha256-hash-4way.c \
|
||||
algo/sha/sha512-hash-4way.c \
|
||||
algo/sha/hmac-sha256-hash.c \
|
||||
algo/sha/hmac-sha256-hash-4way.c \
|
||||
algo/sha/sha2.c \
|
||||
algo/sha/sha256t-gate.c \
|
||||
algo/sha/sha256t-4way.c \
|
||||
@@ -195,7 +177,6 @@ cpuminer_SOURCES = \
|
||||
algo/shavite/shavite-hash-2way.c \
|
||||
algo/shavite/shavite-hash-4way.c \
|
||||
algo/shavite/shavite.c \
|
||||
algo/simd/sph_simd.c \
|
||||
algo/simd/nist.c \
|
||||
algo/simd/vector.c \
|
||||
algo/simd/simd-hash-2way.c \
|
||||
@@ -233,7 +214,6 @@ cpuminer_SOURCES = \
|
||||
algo/x11/timetravel10-gate.c \
|
||||
algo/x11/timetravel10.c \
|
||||
algo/x11/timetravel10-4way.c \
|
||||
algo/x11/fresh.c \
|
||||
algo/x11/x11evo.c \
|
||||
algo/x11/x11evo-4way.c \
|
||||
algo/x11/x11evo-gate.c \
|
||||
@@ -252,7 +232,6 @@ cpuminer_SOURCES = \
|
||||
algo/x13/skunk-gate.c \
|
||||
algo/x13/skunk-4way.c \
|
||||
algo/x13/skunk.c \
|
||||
algo/x13/drop.c \
|
||||
algo/x13/x13bcd-4way.c \
|
||||
algo/x13/x13bcd.c \
|
||||
algo/x14/x14-gate.c \
|
||||
@@ -278,6 +257,7 @@ cpuminer_SOURCES = \
|
||||
algo/x16/hex.c \
|
||||
algo/x16/x21s-4way.c \
|
||||
algo/x16/x21s.c \
|
||||
algo/x16/minotaur.c \
|
||||
algo/x17/x17-gate.c \
|
||||
algo/x17/x17.c \
|
||||
algo/x17/x17-4way.c \
|
||||
@@ -287,7 +267,6 @@ cpuminer_SOURCES = \
|
||||
algo/x17/sonoa-gate.c \
|
||||
algo/x17/sonoa-4way.c \
|
||||
algo/x17/sonoa.c \
|
||||
algo/x20/x20r.c \
|
||||
algo/x22/x22i-4way.c \
|
||||
algo/x22/x22i.c \
|
||||
algo/x22/x22i-gate.c \
|
||||
|
60
README.md
60
README.md
@@ -12,10 +12,24 @@ a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
|
||||
New thread:
|
||||
|
||||
https://bitcointalk.org/index.php?topic=5226770.msg53865575#msg53865575
|
||||
|
||||
Old thread:
|
||||
|
||||
https://bitcointalk.org/index.php?topic=1326803.0
|
||||
|
||||
mailto://jayddee246@gmail.com
|
||||
|
||||
This note is to confirm that bitcointalk users JayDDee and joblo are the
|
||||
same person.
|
||||
|
||||
I created a new BCT user JayDDee to match my github user id.
|
||||
The old thread has been locked but still contains useful information for
|
||||
reading.
|
||||
|
||||
See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
|
||||
for compile instructions.
|
||||
|
||||
@@ -23,25 +37,25 @@ Requirements
|
||||
------------
|
||||
|
||||
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
|
||||
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
|
||||
optimizations a CPU with AES_NI is required. This includes Intel Westmere
|
||||
and newer and AMD equivalents. Further optimizations are available on some
|
||||
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
|
||||
Intel Core2 and newer and AMD equivalents. Further optimizations are available
|
||||
on some algoritms for CPUs with AES, AVX, AVX2, SHA, AVX512 and VAES.
|
||||
|
||||
Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
|
||||
performance.
|
||||
|
||||
ARM CPUs are not supported.
|
||||
ARM and Aarch64 CPUs are not supported.
|
||||
|
||||
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
|
||||
Centos, are known to work and have all dependencies in their repositories.
|
||||
Others may work but may require more effort. Older versions such as Centos 6
|
||||
don't work due to missing features.
|
||||
2. 64 bit Linux or Windows OS. Ubuntu and Fedora based distributions,
|
||||
including Mint and Centos, are known to work and have all dependencies
|
||||
in their repositories. Others may work but may require more effort. Older
|
||||
versions such as Centos 6 don't work due to missing features.
|
||||
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
|
||||
|
||||
MacOS, OSx and Android are not supported.
|
||||
|
||||
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
|
||||
3. Stratum pool supporting stratum+tcp:// or stratum+ssl:// protocols or
|
||||
RPC getwork using http:// or https://.
|
||||
GBT is YMMV.
|
||||
|
||||
Supported Algorithms
|
||||
--------------------
|
||||
@@ -53,7 +67,6 @@ Supported Algorithms
|
||||
argon2d500 argon2d-dyn, Dynamic (DYN)
|
||||
argon2d4096 argon2d-uis, Unitus, (UIS)
|
||||
axiom Shabal-256 MemoHash
|
||||
bastion
|
||||
blake Blake-256 (SFR)
|
||||
blake2b Blake2b 256
|
||||
blake2s Blake-2 S
|
||||
@@ -64,10 +77,7 @@ Supported Algorithms
|
||||
decred
|
||||
deep Deepcoin (DCN)
|
||||
dmd-gr Diamond-Groestl
|
||||
drop Dropcoin
|
||||
fresh Fresh
|
||||
groestl Groestl coin
|
||||
heavy Heavy
|
||||
hex x16r-hex
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
@@ -83,6 +93,7 @@ Supported Algorithms
|
||||
lyra2z
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||
m7m Magi (XMG)
|
||||
minotaur Ringcoin (RNG)
|
||||
myr-gr Myriad-Groestl
|
||||
neoscrypt NeoScrypt(128, 2, 1)
|
||||
nist5 Nist5
|
||||
@@ -142,6 +153,27 @@ Supported Algorithms
|
||||
yespower-b2b generic yespower + blake2b
|
||||
zr5 Ziftr
|
||||
|
||||
Many variations of scrypt based algos can be mine by specifying their
|
||||
parameters:
|
||||
|
||||
scryptn2: --algo scrypt --param-n 1048576
|
||||
|
||||
cpupower: --algo yespower --param-key "CPUpower: The number of CPU working or available for proof-of-work mining"
|
||||
|
||||
power2b: --algo yespower-b2b --param-n 2048 --param-r 32 --param-key "Now I am become Death, the destroyer of worlds"
|
||||
|
||||
sugarchain: --algo yespower --param-n 2048 -param-r 32 --param-key "Satoshi Nakamoto 31/Oct/2008 Proof-of-work is essentially one-CPU-one-vote"
|
||||
|
||||
yespoweriots: --algo yespower --param-n 2048 --param-key "Iots is committed to the development of IOT"
|
||||
|
||||
yespowerlitb: --algo yespower --param-n 2048 --param-r 32 --param-key "LITBpower: The number of LITB working or available for proof-of-work mini"
|
||||
|
||||
yespoweric: --algo yespower --param-n 2048 --param-r 32 --param-key "IsotopeC"
|
||||
|
||||
yespowerurx: --algo yespower --param-n 2048 --param-r 32 --param-key "UraniumX"
|
||||
|
||||
yespowerltncg: --algo yespower --param-n 2048 --param-r 32 --param-key "LTNCGYES"
|
||||
|
||||
Errata
|
||||
------
|
||||
|
||||
|
37
README.txt
37
README.txt
@@ -1,8 +1,8 @@
|
||||
This file is included in the Windows binary package. Compile instructions
|
||||
for Linux and Windows can be found in RELEASE_NOTES.
|
||||
|
||||
cpuminer is a console program that is executed from a DOS command prompt.
|
||||
There is no GUI and no mouse support.
|
||||
cpuminer is a console program that is executed from a DOS or Powershell
|
||||
prompt. There is no GUI and no mouse support.
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
@@ -15,8 +15,8 @@ the features listed at cpuminer startup to ensure you are mining at
|
||||
optimum speed using the best available features.
|
||||
|
||||
Architecture names and compile options used are only provided for Intel
|
||||
Core series. Budget CPUs like Pentium and Celeron are often missing the
|
||||
latest features.
|
||||
Core series. Budget CPUs like Pentium and Celeron are often missing some
|
||||
features.
|
||||
|
||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||
@@ -31,14 +31,29 @@ https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures
|
||||
https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
|
||||
|
||||
|
||||
Exe name Compile flags Arch name
|
||||
Exe file name Compile flags Arch name
|
||||
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
||||
cpuminer-avx.exe "-march=corei7-avx" Sandybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2 -maes" Haswell, Skylake, Coffeelake
|
||||
cpuminer-avx512.exe "-march=skylake-avx512" Skylake-X, Cascadelake-X
|
||||
cpuminer-zen "-march=znver1" AMD Ryzen, Threadripper
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
||||
cpuminer-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2 -maes" Haswell*
|
||||
cpuminer-avx512.exe "-march=skylake-avx512" Skylake-X, Cascadelake-X
|
||||
cpuminer-zen.exe "-march=znver1" AMD Ryzen, Threadripper
|
||||
cpuminer-avx512-sha-vaes.exe "-march=icelake-client" Icelake*
|
||||
|
||||
* Haswell includes Broadwell, Skylake, Kabylake, Coffeelake & Cometlake.
|
||||
Icelake is only available on some laptops. Mining with a laptop is not
|
||||
recommended. The icelake build is included in anticipation of Intel eventually
|
||||
releasing a desktop CPU with a microarchitecture newer than Skylake.
|
||||
|
||||
Notes about included DLL files:
|
||||
|
||||
Downloading DLL files from alternative sources presents an inherent
|
||||
security risk if their source is unknown. All DLL files included have
|
||||
been copied from the Ubuntu-20.04 instalation or compiled by me from
|
||||
source code obtained from the author's official repository. The exact
|
||||
procedure is documented in the build instructions for Windows:
|
||||
https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
|
||||
|
||||
If you like this software feel free to donate:
|
||||
|
||||
|
245
RELEASE_NOTES
245
RELEASE_NOTES
@@ -65,6 +65,251 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.14.1
|
||||
|
||||
GBT and getwork log changes:
|
||||
fixed missing TTF in New Block log,
|
||||
ntime no longer byte-swapped for display in New Work log,
|
||||
fixed zero effective hash rate in Periodic Report log,
|
||||
deleted "Current block is..." log.
|
||||
|
||||
Renamed stratum "New Job" log to "New Work" to be consistent with the solo
|
||||
version of the log. Added more data to both versions.
|
||||
|
||||
v3.14.0
|
||||
|
||||
Changes to solo mining:
|
||||
- segwit is supported by getblocktemplate,
|
||||
- longpolling is not working and is disabled,
|
||||
- Periodic Report log is output,
|
||||
- New Block log includes TTF estimates,
|
||||
- Stratum thread no longer created when using getwork or GBT.
|
||||
|
||||
Fixed BUG log mining sha256d.
|
||||
|
||||
v3.13.1.1
|
||||
|
||||
Fixed Windows crash mining minotaur algo.
|
||||
|
||||
Fixed GCC 10 compile again.
|
||||
Added -fno-common to testing to be consistent with GCC 10 default.
|
||||
|
||||
v3.13.1
|
||||
|
||||
Added minotaur algo for Ringcoin.
|
||||
|
||||
v3.13.0.1
|
||||
|
||||
Issue #262: Fixed xevan AVX2 invalid shares.
|
||||
|
||||
v3.13.0
|
||||
|
||||
Updated Windows binaries compiled with GCC 9. Included DLLs also updated.
|
||||
Icelake build (cpuminer-avx512-sha-vaes.exe) now included in Windows
|
||||
binaries package.
|
||||
|
||||
No source code changes.
|
||||
|
||||
v3.12.8.2
|
||||
|
||||
Fixed x12 AVX2 rejects.
|
||||
Fixed phi AVX2 crash.
|
||||
|
||||
v3.12.8.1
|
||||
|
||||
Issue #261: Fixed yescryptr8g invalid shares.
|
||||
|
||||
v3.12.8
|
||||
|
||||
Yespower sha256 prehash made thread safe.
|
||||
|
||||
Rewrote diff conversion functions from scratch to be simpler and use
|
||||
long double (float80) and int128 arithmetic for improved accuracy and
|
||||
precision.
|
||||
|
||||
Some code cleanup and assorted small changes.
|
||||
|
||||
v3.12.7
|
||||
|
||||
Issue #257: fixed a file descriptor leak which caused the CPU temperature
|
||||
and frequency query to report zeros after mining for a couple of hours.
|
||||
|
||||
Issue #253: stale share reduction for yescrypt, sonoa.
|
||||
|
||||
v3.12.6.1
|
||||
|
||||
Issue #252: Fixed SSL mining (stratum+tcps://)
|
||||
|
||||
Issue #254 Fixed benchmark.
|
||||
|
||||
Issue #253: Implemented stale share reduction for yespower, x25x, x22i, x21s,
|
||||
x16*, scryptn2, more to come.
|
||||
|
||||
v3.12.6
|
||||
|
||||
Issue #246: improved stale share detection for getwork.
|
||||
|
||||
Improved precision of target_to_diff conversion from 4 digits to 20+.
|
||||
|
||||
Display hash and target debug data for all rejected shares.
|
||||
|
||||
A graphical representation of CPU affinity is displayed when using --threads.
|
||||
|
||||
Added highest and lowest accepted share to summary log.
|
||||
|
||||
Other small changes to logs to improve consistency and clarity.
|
||||
|
||||
v3.12.5
|
||||
|
||||
Issues #246 & #251: fixed incorrect share diff for stratum and getwork,
|
||||
fixed incorrect target diff for getwork. Stats should now be correct for
|
||||
getwork as well as stratum.
|
||||
|
||||
Issue #252: Fixed stratum+tcps not using curl ssl.
|
||||
|
||||
Getwork: reduce stale blocks, faster response to new work.
|
||||
|
||||
Added ntime to new job/work logs.
|
||||
|
||||
README.md now lists the parameters for yespower variations that don't have
|
||||
a specific algo name.
|
||||
|
||||
v3.12.4.6
|
||||
|
||||
Issue #246: fixed getwork repeated new block logs with same height. New work
|
||||
for the same block is now reported as "New work" instead of "New block".
|
||||
Also added a check that work is new before generating "New work" log.
|
||||
|
||||
Added target diff to getwork new block log.
|
||||
|
||||
Changed share ratio in share result log to simple fraction, no longer %.
|
||||
|
||||
Added debug log to display mininginfo, use -D.
|
||||
|
||||
v3.12.4.5
|
||||
|
||||
Issue #246: better stale share detection for getwork, and enhanced logging
|
||||
of stale shares for stratum & getwork.
|
||||
|
||||
Issue #251: fixed incorrect share difficulty and share ratio in share
|
||||
result log.
|
||||
|
||||
Changed submit log to include share diff and block height.
|
||||
|
||||
Small cosmetic changes to logs.
|
||||
|
||||
v3.12.4.4
|
||||
|
||||
Issue #246: Fixed net hashrate in getwork block log,
|
||||
removed duplicate getwork block log,
|
||||
other small tweaks to stats logs for getwork.
|
||||
|
||||
Issue #248: Fixed chronic stale shares with scrypt:1048576 (scryptn2).
|
||||
|
||||
v3.12.4.3
|
||||
|
||||
Fixed segfault in new block log for getwork.
|
||||
|
||||
Disabled silent discarding of stale work after the submit is logged.
|
||||
|
||||
v3.12.4.2
|
||||
|
||||
Issue #245: fixed getwork stale shares, solo mining with getwork now works.
|
||||
|
||||
Issue #246: implemented block and summary logs for getwork.
|
||||
|
||||
v3.12.4.1
|
||||
|
||||
Issue #245: fix scantime when mining solo with getwork.
|
||||
|
||||
Added debug logs for creation of stratum and longpoll threads, use -D to
|
||||
enable.
|
||||
|
||||
v3.12.4
|
||||
|
||||
Issue #244: Change longpoll to ignore job id.
|
||||
|
||||
Lyra2rev2 AVX2 +3%, AVX512 +6%.
|
||||
|
||||
v3.12.3.1
|
||||
|
||||
Issue #241: Fixed regression that broke coinbase address in v3.11.7.
|
||||
|
||||
v3.12.3
|
||||
|
||||
Issue #238: Fixed skunk AVX2.
|
||||
|
||||
Issue #239: Faster AVX2 & AVX512 for skein +44%, skein2 +30%, plus marginal
|
||||
increases for skunk, x16r, x16rv2, x16rt, x16rt-veil, x16s, x21s.
|
||||
|
||||
Faster anime VAES +57%, AVX512 +21%, AVX2 +3%.
|
||||
|
||||
Redesigned code reponsible for #236.
|
||||
|
||||
v3.12.2
|
||||
|
||||
Fixed xevan, skein, skein2 AVX2, #238.
|
||||
|
||||
Reversed polarity of AVX2 vector bit test utilities, and all users, to be
|
||||
logically and semantically correct. Follow up to issue #236.
|
||||
|
||||
v3.12.1
|
||||
|
||||
Fixed anime AVX2 low difficulty shares, git issue #236.
|
||||
|
||||
Periodic summary now reports lost hash rate due to rejected and stale shares,
|
||||
displayed only when non-zero.
|
||||
|
||||
v3.12.0.1
|
||||
|
||||
Fixed hodl rejects, git issue #237.
|
||||
|
||||
Fixed debug code added in v3.12.0 to work with AVX2 to be enabled only
|
||||
after low difficulty share have been seen to avoid unnecessarily excessive
|
||||
log outout.
|
||||
|
||||
Added more digits of precision to diff in log output to help diagnose
|
||||
low difficulty shares.
|
||||
|
||||
v3.12.0
|
||||
|
||||
Faster phi2 AVX2 +62%, AVX512 +150% on Intel CPUs. AMD Ryzen AVX2 is
|
||||
YMMV due to its inferiour AVX2 implementation.
|
||||
|
||||
Fixed Hodl stats, rejects are still an issue since v3.9.5, git issue #237.
|
||||
|
||||
API can now be enabled with "-b port" or "--api-bind port".
|
||||
It will use the default address 127.0.0.1.
|
||||
|
||||
Editorial: Short form options should only be used on the command line to save
|
||||
typing. Configuration files and scripts should always use the long form
|
||||
"--api-bind addr:port" without relying on any defaults. This is a general
|
||||
recommendation that applies to all options for any application.
|
||||
|
||||
Removed obsolete cryptonight, all variants, and supporting code for more
|
||||
size reduction and faster compiling.
|
||||
|
||||
Tweaked the timing of the CPU temperature and frequency log (Linux only).
|
||||
|
||||
Added some debug code to collect more info aboout low difficulty rejects,
|
||||
git issue #236.
|
||||
|
||||
v3.11.9
|
||||
|
||||
Fixed x16r invalid shares when Luffa was first in hash order.
|
||||
|
||||
API is disabled by default.
|
||||
|
||||
New startup message for status of stratum connection, API & extranonce.
|
||||
|
||||
New log report for CPU temperature, frequency of fastest and slowest cores.
|
||||
|
||||
Compile time is a little shorter and binary file size a little smaller
|
||||
using conditional compilation..
|
||||
|
||||
Removed code for Bastion, Drop, Heavy, Luffa an Pluck algos and other unused
|
||||
code.
|
||||
|
||||
v3.11.8
|
||||
|
||||
Fixed network hashrate showing incorrect data, should be close now.
|
||||
|
193
aclocal.m4
vendored
193
aclocal.m4
vendored
@@ -1,6 +1,6 @@
|
||||
# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
|
||||
# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
|
||||
If you have problems, you may need to regenerate the build system entirely.
|
||||
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
||||
|
||||
# Copyright (C) 2002-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2002-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
|
||||
# generated from the m4 files accompanying Automake X.Y.
|
||||
# (This private macro should not be called outside this file.)
|
||||
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||
[am__api_version='1.15'
|
||||
[am__api_version='1.16'
|
||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||
dnl require some minimum version. Point them to the right macro.
|
||||
m4_if([$1], [1.15.1], [],
|
||||
m4_if([$1], [1.16.1], [],
|
||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||
])
|
||||
|
||||
@@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
|
||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||
[AM_AUTOMAKE_VERSION([1.15.1])dnl
|
||||
[AM_AUTOMAKE_VERSION([1.16.1])dnl
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
||||
|
||||
# Figure out how to run the assembler. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -78,7 +78,7 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
|
||||
|
||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -130,7 +130,7 @@ am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
||||
|
||||
# AM_CONDITIONAL -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1997-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -161,7 +161,7 @@ AC_CONFIG_COMMANDS_PRE(
|
||||
Usually this means the macro was only invoked conditionally.]])
|
||||
fi])])
|
||||
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -352,13 +352,12 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
|
||||
|
||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
|
||||
# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
# ------------------------------
|
||||
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
@@ -366,49 +365,41 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
# Older Autoconf quotes --file arguments for eval, but not when files
|
||||
# are listed without --file. Let's play safe and only enable the eval
|
||||
# if we detect the quoting.
|
||||
case $CONFIG_FILES in
|
||||
*\'*) eval set x "$CONFIG_FILES" ;;
|
||||
*) set x $CONFIG_FILES ;;
|
||||
esac
|
||||
# TODO: see whether this extra hack can be removed once we start
|
||||
# requiring Autoconf 2.70 or later.
|
||||
AS_CASE([$CONFIG_FILES],
|
||||
[*\'*], [eval set x "$CONFIG_FILES"],
|
||||
[*], [set x $CONFIG_FILES])
|
||||
shift
|
||||
for mf
|
||||
# Used to flag and report bootstrapping failures.
|
||||
am_rc=0
|
||||
for am_mf
|
||||
do
|
||||
# Strip MF so we end up with the name of the file.
|
||||
mf=`echo "$mf" | sed -e 's/:.*$//'`
|
||||
# Check whether this is an Automake generated Makefile or not.
|
||||
# We used to match only the files named 'Makefile.in', but
|
||||
# some people rename them; so instead we look at the file content.
|
||||
# Grep'ing the first line is not enough: some people post-process
|
||||
# each Makefile.in and add a new line on top of each file to say so.
|
||||
# Grep'ing the whole file is not good either: AIX grep has a line
|
||||
am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'`
|
||||
# Check whether this is an Automake generated Makefile which includes
|
||||
# dependency-tracking related rules and includes.
|
||||
# Grep'ing the whole file directly is not great: AIX grep has a line
|
||||
# limit of 2048, but all sed's we know have understand at least 4000.
|
||||
if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
|
||||
dirpart=`AS_DIRNAME("$mf")`
|
||||
else
|
||||
continue
|
||||
fi
|
||||
# Extract the definition of DEPDIR, am__include, and am__quote
|
||||
# from the Makefile without running 'make'.
|
||||
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
|
||||
test -z "$DEPDIR" && continue
|
||||
am__include=`sed -n 's/^am__include = //p' < "$mf"`
|
||||
test -z "$am__include" && continue
|
||||
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
|
||||
# Find all dependency output files, they are included files with
|
||||
# $(DEPDIR) in their names. We invoke sed twice because it is the
|
||||
# simplest approach to changing $(DEPDIR) to its actual value in the
|
||||
# expansion.
|
||||
for file in `sed -n "
|
||||
s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
|
||||
sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
|
||||
# Make sure the directory exists.
|
||||
test -f "$dirpart/$file" && continue
|
||||
fdir=`AS_DIRNAME(["$file"])`
|
||||
AS_MKDIR_P([$dirpart/$fdir])
|
||||
# echo "creating $dirpart/$file"
|
||||
echo '# dummy' > "$dirpart/$file"
|
||||
done
|
||||
sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
|
||||
|| continue
|
||||
am_dirpart=`AS_DIRNAME(["$am_mf"])`
|
||||
am_filepart=`AS_BASENAME(["$am_mf"])`
|
||||
AM_RUN_LOG([cd "$am_dirpart" \
|
||||
&& sed -e '/# am--include-marker/d' "$am_filepart" \
|
||||
| $MAKE -f - am--depfiles]) || am_rc=$?
|
||||
done
|
||||
if test $am_rc -ne 0; then
|
||||
AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
|
||||
for automatic dependency tracking. Try re-running configure with the
|
||||
'--disable-dependency-tracking' option to at least be able to build
|
||||
the package (albeit without support for automatic dependency tracking).])
|
||||
fi
|
||||
AS_UNSET([am_dirpart])
|
||||
AS_UNSET([am_filepart])
|
||||
AS_UNSET([am_mf])
|
||||
AS_UNSET([am_rc])
|
||||
rm -f conftest-deps.mk
|
||||
}
|
||||
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
|
||||
@@ -417,18 +408,17 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
# -----------------------------
|
||||
# This macro should only be invoked once -- use via AC_REQUIRE.
|
||||
#
|
||||
# This code is only required when automatic dependency tracking
|
||||
# is enabled. FIXME. This creates each '.P' file that we will
|
||||
# need in order to bootstrap the dependency handling code.
|
||||
# This code is only required when automatic dependency tracking is enabled.
|
||||
# This creates each '.Po' and '.Plo' makefile fragment that we'll need in
|
||||
# order to bootstrap the dependency handling code.
|
||||
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[AC_CONFIG_COMMANDS([depfiles],
|
||||
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
|
||||
])
|
||||
[AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])])
|
||||
|
||||
# Do all the work for Automake. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -515,8 +505,8 @@ AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
|
||||
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
||||
# For better backward compatibility. To be removed once Automake 1.9.x
|
||||
# dies out for good. For more background, see:
|
||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
|
||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||
# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
|
||||
# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
|
||||
# We need awk for the "check" target (and possibly the TAP driver). The
|
||||
# system "awk" is bad on some platforms.
|
||||
@@ -583,7 +573,7 @@ END
|
||||
Aborting the configuration process, to ensure you take notice of the issue.
|
||||
|
||||
You can download and install GNU coreutils to get an 'rm' implementation
|
||||
that behaves properly: <http://www.gnu.org/software/coreutils/>.
|
||||
that behaves properly: <https://www.gnu.org/software/coreutils/>.
|
||||
|
||||
If you want to complete the configuration process using your problematic
|
||||
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
|
||||
@@ -625,7 +615,7 @@ for _am_header in $config_headers :; do
|
||||
done
|
||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -646,7 +636,7 @@ if test x"${install_sh+set}" != xset; then
|
||||
fi
|
||||
AC_SUBST([install_sh])])
|
||||
|
||||
# Copyright (C) 2003-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2003-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -668,7 +658,7 @@ AC_SUBST([am__leading_dot])])
|
||||
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
|
||||
# From Jim Meyering
|
||||
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -703,7 +693,7 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
||||
|
||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -711,49 +701,42 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
||||
|
||||
# AM_MAKE_INCLUDE()
|
||||
# -----------------
|
||||
# Check to see how make treats includes.
|
||||
# Check whether make has an 'include' directive that can support all
|
||||
# the idioms we need for our automatic dependency tracking code.
|
||||
AC_DEFUN([AM_MAKE_INCLUDE],
|
||||
[am_make=${MAKE-make}
|
||||
cat > confinc << 'END'
|
||||
[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive])
|
||||
cat > confinc.mk << 'END'
|
||||
am__doit:
|
||||
@echo this is the am__doit target
|
||||
@echo this is the am__doit target >confinc.out
|
||||
.PHONY: am__doit
|
||||
END
|
||||
# If we don't find an include directive, just comment out the code.
|
||||
AC_MSG_CHECKING([for style of include used by $am_make])
|
||||
am__include="#"
|
||||
am__quote=
|
||||
_am_result=none
|
||||
# First try GNU make style include.
|
||||
echo "include confinc" > confmf
|
||||
# Ignore all kinds of additional output from 'make'.
|
||||
case `$am_make -s -f confmf 2> /dev/null` in #(
|
||||
*the\ am__doit\ target*)
|
||||
am__include=include
|
||||
am__quote=
|
||||
_am_result=GNU
|
||||
;;
|
||||
esac
|
||||
# Now try BSD make style include.
|
||||
if test "$am__include" = "#"; then
|
||||
echo '.include "confinc"' > confmf
|
||||
case `$am_make -s -f confmf 2> /dev/null` in #(
|
||||
*the\ am__doit\ target*)
|
||||
am__include=.include
|
||||
am__quote="\""
|
||||
_am_result=BSD
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
AC_SUBST([am__include])
|
||||
AC_SUBST([am__quote])
|
||||
AC_MSG_RESULT([$_am_result])
|
||||
rm -f confinc confmf
|
||||
])
|
||||
# BSD make does it like this.
|
||||
echo '.include "confinc.mk" # ignored' > confmf.BSD
|
||||
# Other make implementations (GNU, Solaris 10, AIX) do it like this.
|
||||
echo 'include confinc.mk # ignored' > confmf.GNU
|
||||
_am_result=no
|
||||
for s in GNU BSD; do
|
||||
AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out])
|
||||
AS_CASE([$?:`cat confinc.out 2>/dev/null`],
|
||||
['0:this is the am__doit target'],
|
||||
[AS_CASE([$s],
|
||||
[BSD], [am__include='.include' am__quote='"'],
|
||||
[am__include='include' am__quote=''])])
|
||||
if test "$am__include" != "#"; then
|
||||
_am_result="yes ($s style)"
|
||||
break
|
||||
fi
|
||||
done
|
||||
rm -f confinc.* confmf.*
|
||||
AC_MSG_RESULT([${_am_result}])
|
||||
AC_SUBST([am__include])])
|
||||
AC_SUBST([am__quote])])
|
||||
|
||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1997-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -792,7 +775,7 @@ fi
|
||||
|
||||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -821,7 +804,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
|
||||
AC_DEFUN([_AM_IF_OPTION],
|
||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -868,7 +851,7 @@ AC_LANG_POP([C])])
|
||||
# For backward compatibility.
|
||||
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
|
||||
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -887,7 +870,7 @@ AC_DEFUN([AM_RUN_LOG],
|
||||
|
||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -968,7 +951,7 @@ AC_CONFIG_COMMANDS_PRE(
|
||||
rm -f conftest.file
|
||||
])
|
||||
|
||||
# Copyright (C) 2009-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2009-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1028,7 +1011,7 @@ AC_SUBST([AM_BACKSLASH])dnl
|
||||
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
|
||||
])
|
||||
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1056,7 +1039,7 @@ fi
|
||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||
|
||||
# Copyright (C) 2006-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2006-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1075,7 +1058,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
|
||||
|
||||
# Check how to create a tarball. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2004-2017 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2004-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
|
151
algo-gate-api.c
151
algo-gate-api.c
@@ -90,34 +90,59 @@ void algo_not_implemented()
|
||||
}
|
||||
|
||||
// default null functions
|
||||
|
||||
// deprecated, use generic as default
|
||||
int null_scanhash()
|
||||
{
|
||||
applog(LOG_WARNING,"SWERR: undefined scanhash function in algo_gate");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void null_hash()
|
||||
// Default generic scanhash can be used in many cases.
|
||||
int scanhash_generic( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 1;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
do
|
||||
{
|
||||
edata[19] = n;
|
||||
if ( likely( algo_gate.hash( hash, edata, thr_id ) ) )
|
||||
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int null_hash()
|
||||
{
|
||||
applog(LOG_WARNING,"SWERR: null_hash unsafe null function");
|
||||
};
|
||||
void null_hash_suw()
|
||||
{
|
||||
applog(LOG_WARNING,"SWERR: null_hash_suw unsafe null function");
|
||||
return 0;
|
||||
};
|
||||
|
||||
void init_algo_gate( algo_gate_t* gate )
|
||||
{
|
||||
gate->miner_thread_init = (void*)&return_true;
|
||||
gate->scanhash = (void*)&null_scanhash;
|
||||
gate->scanhash = (void*)&scanhash_generic;
|
||||
gate->hash = (void*)&null_hash;
|
||||
gate->hash_suw = (void*)&null_hash_suw;
|
||||
gate->get_new_work = (void*)&std_get_new_work;
|
||||
gate->get_nonceptr = (void*)&std_get_nonceptr;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->decode_extra_data = (void*)&do_nothing;
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
|
||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
||||
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
|
||||
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
||||
@@ -129,7 +154,6 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->resync_threads = (void*)&do_nothing;
|
||||
gate->do_this_thread = (void*)&return_true;
|
||||
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
||||
gate->stratum_handle_response = (void*)&std_stratum_handle_response;
|
||||
gate->get_work_data_size = (void*)&std_get_work_data_size;
|
||||
gate->optimizations = EMPTY_SET;
|
||||
gate->ntime_index = STD_NTIME_INDEX;
|
||||
@@ -162,23 +186,16 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
case ALGO_BMW512: register_bmw512_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHTV7: register_cryptonightv7_algo ( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HEX: register_hex_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
@@ -186,7 +203,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
@@ -194,13 +210,13 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MINOTAUR: register_minotaur_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
|
||||
case ALGO_PHI2: register_phi2_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_POWER2B: register_power2b_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
@@ -241,11 +257,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_X22I: register_x22i_algo ( gate ); break;
|
||||
case ALGO_X25X: register_x25x_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
/* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_05_algo ( gate ); break;
|
||||
*/
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8G: register_yescryptr8g_algo ( gate ); break;
|
||||
@@ -272,30 +283,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
// restore warnings
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
// override std defaults with jr2 defaults
|
||||
bool register_json_rpc2( algo_gate_t *gate )
|
||||
{
|
||||
applog(LOG_WARNING,"\nCryptonight algorithm and variants are no longer");
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
|
||||
// gate->wait_for_diff = (void*)&do_nothing;
|
||||
gate->get_new_work = (void*)&jr2_get_new_work;
|
||||
gate->get_nonceptr = (void*)&jr2_get_nonceptr;
|
||||
gate->stratum_gen_work = (void*)&jr2_stratum_gen_work;
|
||||
gate->build_stratum_request = (void*)&jr2_build_stratum_request;
|
||||
gate->submit_getwork_result = (void*)&jr2_submit_getwork_result;
|
||||
gate->longpoll_rpc_call = (void*)&jr2_longpoll_rpc_call;
|
||||
gate->work_decode = (void*)&jr2_work_decode;
|
||||
gate->stratum_handle_response = (void*)&jr2_stratum_handle_response;
|
||||
gate->nonce_index = JR2_NONCE_INDEX;
|
||||
jsonrpc_2 = true; // still needed
|
||||
opt_extranonce = false;
|
||||
// have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// run the alternate hash function for a specific algo
|
||||
void exec_hash_function( int algo, void *output, const void *pdata )
|
||||
{
|
||||
algo_gate_t gate;
|
||||
@@ -315,39 +302,37 @@ void exec_hash_function( int algo, void *output, const void *pdata )
|
||||
const char* const algo_alias_map[][2] =
|
||||
{
|
||||
// alias proper
|
||||
{ "argon2d-crds", "argon2d250" },
|
||||
{ "argon2d-dyn", "argon2d500" },
|
||||
{ "argon2d-uis", "argon2d4096" },
|
||||
{ "bcd", "x13bcd" },
|
||||
{ "bitcore", "timetravel10" },
|
||||
{ "bitzeny", "yescryptr8" },
|
||||
{ "blake256r8", "blakecoin" },
|
||||
{ "blake256r8vnl", "vanilla" },
|
||||
{ "blake256r14", "blake" },
|
||||
{ "blake256r14dcr", "decred" },
|
||||
{ "cryptonote", "cryptonight" },
|
||||
{ "cryptonight-light", "cryptolight" },
|
||||
{ "diamond", "dmd-gr" },
|
||||
{ "droplp", "drop" },
|
||||
{ "espers", "hmq1725" },
|
||||
{ "flax", "c11" },
|
||||
{ "hsr", "x13sm3" },
|
||||
{ "jackpot", "jha" },
|
||||
{ "jane", "scryptjane" },
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2v3", "lyra2rev3" },
|
||||
{ "myrgr", "myr-gr" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
{ "phi", "phi1612" },
|
||||
{ "sib", "x11gost" },
|
||||
{ "timetravel8", "timetravel" },
|
||||
{ "veil", "x16rt-veil" },
|
||||
{ "x16r-hex", "hex" },
|
||||
{ "yenten", "yescryptr16" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ NULL, NULL }
|
||||
{ "argon2d-crds", "argon2d250" },
|
||||
{ "argon2d-dyn", "argon2d500" },
|
||||
{ "argon2d-uis", "argon2d4096" },
|
||||
{ "bcd", "x13bcd" },
|
||||
{ "bitcore", "timetravel10" },
|
||||
{ "bitzeny", "yescryptr8" },
|
||||
{ "blake256r8", "blakecoin" },
|
||||
{ "blake256r8vnl", "vanilla" },
|
||||
{ "blake256r14", "blake" },
|
||||
{ "blake256r14dcr", "decred" },
|
||||
{ "diamond", "dmd-gr" },
|
||||
{ "espers", "hmq1725" },
|
||||
{ "flax", "c11" },
|
||||
{ "hsr", "x13sm3" },
|
||||
{ "jackpot", "jha" },
|
||||
{ "jane", "scryptjane" },
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2v3", "lyra2rev3" },
|
||||
{ "myrgr", "myr-gr" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
{ "phi", "phi1612" },
|
||||
{ "scryptn2", "scrypt:1048576" },
|
||||
{ "sib", "x11gost" },
|
||||
{ "timetravel8", "timetravel" },
|
||||
{ "veil", "x16rt-veil" },
|
||||
{ "x16r-hex", "hex" },
|
||||
{ "yenten", "yescryptr16" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
// if arg is a valid alias for a known algo it is updated with the proper
|
||||
@@ -360,7 +345,7 @@ void get_algo_alias( char** algo_or_alias )
|
||||
if ( !strcasecmp( *algo_or_alias, algo_alias_map[i][ ALIAS ] ) )
|
||||
{
|
||||
// found valid alias, return proper name
|
||||
*algo_or_alias = (char* const)( algo_alias_map[i][ PROPER ] );
|
||||
*algo_or_alias = (char*)( algo_alias_map[i][ PROPER ] );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@@ -75,7 +75,7 @@
|
||||
|
||||
// my hack at creating a set data type using bit masks. Set inclusion,
|
||||
// exclusion union and intersection operations are provided for convenience. In // some cases it may be desireable to use boolean algebra directly on the
|
||||
// data to perfomr set operations. Sets can be represented as single
|
||||
// data to perform set operations. Sets can be represented as single
|
||||
// elements, a bitwise OR of multiple elements, a bitwise OR of multiple
|
||||
// set variables or constants, or combinations of the above.
|
||||
// Examples:
|
||||
@@ -110,12 +110,13 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// mandatory functions, must be overwritten
|
||||
// Mandatory functions, one of these is mandatory. If the default scanhash
|
||||
// is used a custom hash function must be registered, with a custom scanhash
|
||||
// the hash function is not necessary.
|
||||
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
||||
|
||||
// optional unsafe, must be overwritten if algo uses function
|
||||
void ( *hash ) ( void*, const void*, uint32_t ) ;
|
||||
void ( *hash_suw ) ( void*, const void* );
|
||||
//int ( *hash ) ( void*, const void*, uint32_t ) ;
|
||||
int ( *hash ) ( void*, const void*, int );
|
||||
|
||||
//optional, safe to use default in most cases
|
||||
|
||||
@@ -123,15 +124,9 @@ void ( *hash_suw ) ( void*, const void* );
|
||||
// threads.
|
||||
bool ( *miner_thread_init ) ( int );
|
||||
|
||||
// Generate global blockheader from stratum data.
|
||||
void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
|
||||
|
||||
// Get thread local copy of blockheader with unique nonce.
|
||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
|
||||
|
||||
// Return pointer to nonce in blockheader.
|
||||
uint32_t *( *get_nonceptr ) ( uint32_t* );
|
||||
|
||||
// Decode getwork blockheader
|
||||
bool ( *work_decode ) ( const json_t*, struct work* );
|
||||
|
||||
@@ -168,8 +163,9 @@ bool ( *do_this_thread ) ( int );
|
||||
// After do_this_thread
|
||||
void ( *resync_threads ) ( struct work* );
|
||||
|
||||
// No longer needed
|
||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||
bool ( *stratum_handle_response ) ( json_t* );
|
||||
|
||||
set_t optimizations;
|
||||
int ( *get_work_data_size ) ();
|
||||
int ntime_index;
|
||||
@@ -213,40 +209,32 @@ void four_way_not_tested();
|
||||
#define JR2_WORK_CMP_INDEX_2 43
|
||||
#define JR2_WORK_CMP_SIZE_2 33
|
||||
|
||||
// allways returns failure
|
||||
// deprecated, use generic instead
|
||||
int null_scanhash();
|
||||
|
||||
// Default generic, may be used in many cases.
|
||||
int scanhash_generic( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
// displays warning
|
||||
void null_hash ();
|
||||
void null_hash_suw();
|
||||
int null_hash ();
|
||||
|
||||
// optional safe targets, default listed first unless noted.
|
||||
|
||||
uint32_t *std_get_nonceptr( uint32_t *work_data );
|
||||
uint32_t *jr2_get_nonceptr( uint32_t *work_data );
|
||||
|
||||
void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr );
|
||||
void jr2_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
||||
uint32_t* end_nonce_ptr );
|
||||
|
||||
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
|
||||
void jr2_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
|
||||
|
||||
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
|
||||
void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
|
||||
|
||||
bool std_le_work_decode( const json_t *val, struct work *work );
|
||||
bool std_be_work_decode( const json_t *val, struct work *work );
|
||||
bool jr2_work_decode( const json_t *val, struct work *work );
|
||||
|
||||
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
|
||||
|
||||
void std_le_build_stratum_request( char *req, struct work *work );
|
||||
void std_be_build_stratum_request( char *req, struct work *work );
|
||||
void jr2_build_stratum_request ( char *req, struct work *work );
|
||||
|
||||
char* std_malloc_txs_request( struct work *work );
|
||||
|
||||
@@ -263,10 +251,6 @@ void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||
|
||||
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
|
||||
json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
|
||||
|
||||
bool std_stratum_handle_response( json_t *val );
|
||||
bool jr2_stratum_handle_response( json_t *val );
|
||||
|
||||
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id );
|
||||
@@ -285,11 +269,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate );
|
||||
// compiler warnings but that's just more work for devs adding new algos.
|
||||
bool register_algo( algo_gate_t *gate );
|
||||
|
||||
// Overrides a common set of functions used by RPC2 and other RPC2-specific
|
||||
// init. Called by algo's register function before initializing algo-specific
|
||||
// functions and data.
|
||||
bool register_json_rpc2( algo_gate_t *gate );
|
||||
|
||||
// use this to call the hash function of an algo directly, ie util.c test.
|
||||
void exec_hash_function( int algo, void *output, const void *pdata );
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#include "argon2d-gate.h"
|
||||
#include "simd-utils.h"
|
||||
#include "argon2d/argon2.h"
|
||||
|
||||
static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input)
|
||||
@@ -36,7 +37,7 @@ void argon2d_crds_hash( void *output, const void *input )
|
||||
int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -45,11 +46,11 @@ int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_crds_hash( hash, endiandata );
|
||||
be32enc(&edata[19], nonce);
|
||||
argon2d_crds_hash( hash, edata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
@@ -103,31 +104,32 @@ void argon2d_dyn_hash( void *output, const void *input )
|
||||
int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const int thr_id = mythr->id;
|
||||
const uint32_t first_nonce = (const uint32_t)pdata[19];
|
||||
const uint32_t last_nonce = (const uint32_t)max_nonce;
|
||||
uint32_t nonce = first_nonce;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
do
|
||||
{
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_dyn_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = nonce;
|
||||
argon2d_dyn_hash( hash, edata );
|
||||
if ( unlikely( valid_hash( (uint64_t*)hash, (uint64_t*)ptarget )
|
||||
&& !bench ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
pdata[19] = bswap_32( nonce );;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
} while ( likely( nonce < last_nonce && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -146,36 +148,34 @@ int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = (const uint32_t)max_nonce;
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t t_cost = 1; // 1 iteration
|
||||
uint32_t m_cost = 4096; // use 4MB
|
||||
uint32_t parallelism = 1; // 1 thread, 2 lanes
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) endiandata, 80,
|
||||
(char*) endiandata, 80, (char*) vhash, 32, ARGON2_VERSION_13 );
|
||||
if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = n;
|
||||
argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) edata, 80,
|
||||
(char*) edata, 80, (char*) vhash, 32, ARGON2_VERSION_13 );
|
||||
if ( unlikely( valid_hash( vhash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
be32enc( &pdata[19], n );
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( likely( n < last_nonce && !work_restart[thr_id].restart ) );
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -48,7 +48,7 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
@@ -107,7 +107,7 @@ int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
|
@@ -45,7 +45,7 @@ int scanhash_blake2b_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
@@ -100,7 +100,7 @@ int scanhash_blake2b_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -33,6 +33,8 @@
|
||||
|
||||
#include "blake2b-hash-4way.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
static const uint8_t sigma[12][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
@@ -203,9 +205,9 @@ void blake2b_8way_final( blake2b_8way_ctx *ctx, void *out )
|
||||
casti_m512i( out, 3 ) = ctx->h[3];
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // AVX512
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// AVX2
|
||||
|
||||
// G Mixing function.
|
||||
|
||||
@@ -369,4 +371,4 @@ void blake2b_4way_final( blake2b_4way_ctx *ctx, void *out )
|
||||
casti_m256i( out, 3 ) = ctx->h[3];
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // AVX2
|
||||
|
@@ -4,6 +4,9 @@
|
||||
*/
|
||||
|
||||
#include "blake2b-gate.h"
|
||||
|
||||
#if !defined(BLAKE2B_8WAY) && !defined(BLAKE2B_4WAY)
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "algo/blake/sph_blake2b.h"
|
||||
@@ -58,3 +61,4 @@ int scanhash_blake2b( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -49,7 +49,7 @@ int scanhash_blake2s_16way( struct work *work, uint32_t max_nonce,
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 16;
|
||||
@@ -104,7 +104,7 @@ int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
@@ -157,7 +157,7 @@ int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#include "blake2s-gate.h"
|
||||
|
||||
#if !defined(BLAKE2S_16WAY) && !defined(BLAKE2S_8WAY) && !defined(BLAKE2S)
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -70,3 +72,4 @@ int scanhash_blake2s( struct work *work,
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -49,7 +49,7 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
@@ -108,7 +108,7 @@ int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "blakecoin-gate.h"
|
||||
|
||||
#if !defined(BLAKECOIN_8WAY) && !defined(BLAKECOIN_4WAY)
|
||||
|
||||
#define BLAKE32_ROUNDS 8
|
||||
#include "sph_blake.h"
|
||||
|
||||
@@ -93,3 +96,4 @@ int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -62,7 +62,7 @@ int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[DECRED_NONCE_INDEX] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
@@ -153,7 +153,7 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&decred_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
// gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "decred-gate.h"
|
||||
|
||||
#if !defined(DECRED_8WAY) && !defined(DECRED_4WAY)
|
||||
|
||||
#include "sph_blake.h"
|
||||
|
||||
#include <string.h>
|
||||
@@ -275,3 +278,5 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
@@ -105,7 +105,7 @@ int scanhash_pentablake_4way( struct work *work,
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "pentablake-gate.h"
|
||||
|
||||
#if !defined(PENTABLAKE_8WAY) && !defined(PENTABLAKE_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -111,3 +114,4 @@ int scanhash_pentablake( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -138,7 +138,7 @@ void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// BMW-512 4 way 64
|
||||
// BMW-512 64 bit 4 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16];
|
||||
@@ -149,7 +149,6 @@ typedef struct {
|
||||
|
||||
typedef bmw_4way_big_context bmw512_4way_context;
|
||||
|
||||
|
||||
void bmw512_4way_init(void *cc);
|
||||
|
||||
void bmw512_4way_update(void *cc, const void *data, size_t len);
|
||||
@@ -164,6 +163,7 @@ void bmw512_4way_addbits_and_close(
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// BMW-512 64 bit 8 way
|
||||
typedef struct {
|
||||
__m512i buf[16];
|
||||
__m512i H[16];
|
||||
@@ -171,6 +171,8 @@ typedef struct {
|
||||
uint64_t bit_count;
|
||||
} bmw512_8way_context __attribute__((aligned(128)));
|
||||
|
||||
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
|
||||
size_t len );
|
||||
void bmw512_8way_init( bmw512_8way_context *ctx );
|
||||
void bmw512_8way_update( bmw512_8way_context *ctx, const void *data,
|
||||
size_t len );
|
||||
|
@@ -46,7 +46,7 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
@@ -99,7 +99,7 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -1507,6 +1507,93 @@ void bmw512_8way_close( bmw512_8way_context *ctx, void *dst )
|
||||
casti_m512i( dst, u ) = h1[ v ];
|
||||
}
|
||||
|
||||
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m512i *vdata = (__m512i*)data;
|
||||
__m512i *buf = ctx->buf;
|
||||
__m512i htmp[16];
|
||||
__m512i *H = ctx->H;
|
||||
__m512i *h2 = htmp;
|
||||
uint64_t bit_count = len * 8;
|
||||
size_t ptr = 0;
|
||||
const int buf_size = 128; // bytes of one lane, compatible with len
|
||||
|
||||
// Init
|
||||
|
||||
H[ 0] = m512_const1_64( 0x8081828384858687 );
|
||||
H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F );
|
||||
H[ 2] = m512_const1_64( 0x9091929394959697 );
|
||||
H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F );
|
||||
H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 );
|
||||
H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF );
|
||||
H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 );
|
||||
H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF );
|
||||
H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 );
|
||||
H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF );
|
||||
H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 );
|
||||
H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF );
|
||||
H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 );
|
||||
H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF );
|
||||
H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 );
|
||||
H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF );
|
||||
|
||||
// Update
|
||||
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_512( buf + (ptr>>3), vdata, clen >> 3 );
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
__m512i *ht;
|
||||
compress_big_8way( buf, H, h2 );
|
||||
ht = H;
|
||||
H = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
if ( H != ctx->H )
|
||||
memcpy_512( ctx->H, H, 16 );
|
||||
|
||||
// Close
|
||||
{
|
||||
__m512i h1[16], h2[16];
|
||||
size_t u, v;
|
||||
|
||||
buf[ ptr>>3 ] = m512_const1_64( 0x80 );
|
||||
ptr += 8;
|
||||
|
||||
if ( ptr > (buf_size - 8) )
|
||||
{
|
||||
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||
compress_big_8way( buf, H, h1 );
|
||||
ptr = 0;
|
||||
H = h1;
|
||||
}
|
||||
memset_zero_512( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
|
||||
buf[ (buf_size - 8) >> 3 ] = _mm512_set1_epi64( bit_count );
|
||||
compress_big_8way( buf, H, h2 );
|
||||
for ( u = 0; u < 16; u ++ )
|
||||
buf[ u ] = h2[ u ];
|
||||
compress_big_8way( buf, final_b8, h1 );
|
||||
for (u = 0, v = 8; u < 8; u ++, v ++)
|
||||
casti_m512i( out, u ) = h1[ v ];
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if !defined(BMW512_8WAY) && !defined(BMW512_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
@@ -50,4 +52,4 @@ int scanhash_bmw512( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -48,6 +48,8 @@ extern "C"{
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0x00010203), SPH_C32(0x04050607),
|
||||
SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F),
|
||||
@@ -70,6 +72,8 @@ static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
|
||||
};
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
static const sph_u64 IV384[] = {
|
||||
@@ -135,6 +139,8 @@ static const sph_u64 IV512[] = {
|
||||
#define M16_30 14, 15, 1, 2, 5, 8, 9
|
||||
#define M16_31 15, 16, 2, 3, 6, 9, 10
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
|
||||
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
|
||||
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
|
||||
@@ -189,6 +195,8 @@ static const sph_u64 IV512[] = {
|
||||
#define expand2s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
|
||||
@@ -291,6 +299,8 @@ static const sph_u64 Kb_tab[] = {
|
||||
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
|
||||
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
@@ -407,6 +417,8 @@ static const sph_u64 Kb_tab[] = {
|
||||
|
||||
#define Qs(j) (qt[j])
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
@@ -557,7 +569,6 @@ static const sph_u64 Kb_tab[] = {
|
||||
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
|
||||
} while (0)
|
||||
|
||||
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
|
||||
|
||||
#if SPH_64
|
||||
|
||||
@@ -565,6 +576,10 @@ static const sph_u64 Kb_tab[] = {
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
|
||||
|
||||
static void
|
||||
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
|
||||
{
|
||||
@@ -711,6 +726,8 @@ bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n,
|
||||
sph_enc32le(out + 4 * u, h1[v]);
|
||||
}
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
static void
|
||||
@@ -840,6 +857,8 @@ bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n,
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_init(void *cc)
|
||||
@@ -898,6 +917,8 @@ sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
// sph_bmw256_init(cc);
|
||||
}
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/* see sph_bmw.h */
|
||||
|
@@ -77,6 +77,9 @@ extern "C"{
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
@@ -102,6 +105,8 @@ typedef sph_bmw_small_context sph_bmw224_context;
|
||||
*/
|
||||
typedef sph_bmw_small_context sph_bmw256_context;
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
@@ -137,6 +142,8 @@ typedef sph_bmw_big_context sph_bmw512_context;
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
/**
|
||||
* Initialize a BMW-224 context. This process performs no memory allocation.
|
||||
*
|
||||
@@ -227,6 +234,8 @@ void sph_bmw256_close(void *cc, void *dst);
|
||||
void sph_bmw256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
|
@@ -1,368 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
#ifndef NOASM
|
||||
#define NOASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
#include "crypto/hash-ops.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
|
||||
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
|
||||
#elif defined (_MSC_VER)
|
||||
/* only for mingw64 on windows */
|
||||
#undef USE_INT128
|
||||
#define USE_INT128 (0)
|
||||
#else
|
||||
typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define LITE 1
|
||||
#if LITE /* cryptonight-light */
|
||||
#define MEMORY (1 << 20)
|
||||
#define ITER (1 << 19)
|
||||
#else
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#endif
|
||||
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
static void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
static void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SUCCESS == r));
|
||||
}
|
||||
|
||||
static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#if !defined(_MSC_VER) && !defined(NOASM)
|
||||
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#else
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
#endif
|
||||
|
||||
#if defined(NOASM) || !defined(__x86_64__)
|
||||
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = hi_dword(multiplier);
|
||||
uint64_t b = lo_dword(multiplier);
|
||||
uint64_t c = hi_dword(multiplicand);
|
||||
uint64_t d = lo_dword(multiplicand);
|
||||
|
||||
uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + bc;
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
assert(ac <= *product_hi);
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#else
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
return ((uint32_t *)a)[0] & 0x1FFFF0;
|
||||
#else
|
||||
return ((uint32_t *)a)[0] & 0xFFFF0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) a) ^= *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
struct cryptonight_ctx {
|
||||
uint8_t _ALIGN(16) long_state[MEMORY];
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
|
||||
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
|
||||
oaes_ctx* aes_ctx;
|
||||
};
|
||||
|
||||
static void cryptolight_hash_ctx(void* output, const void* input, int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
len = 76;
|
||||
hash_process(&ctx->state.hs, (const uint8_t*) input, len);
|
||||
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
|
||||
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx->a);
|
||||
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
|
||||
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx->a);
|
||||
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
|
||||
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx->state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
void cryptolight_hash(void* output, const void* input, int len) {
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
cryptolight_hash_ctx(output, input, len, ctx);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
#if defined(__AES__)
|
||||
|
||||
static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
|
||||
int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
hash_process(&ctx->state.hs, (const uint8_t*)input, len);
|
||||
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
size_t i, j;
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
|
||||
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
|
||||
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx->a);
|
||||
fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
|
||||
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx->a);
|
||||
fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
|
||||
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
|
||||
}
|
||||
|
||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &ctx->long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &ctx->long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &ctx->long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &ctx->long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &ctx->long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &ctx->long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &ctx->long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &ctx->long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
fast_aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx->state.hs);
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int scanhash_cryptolight( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
uint32_t _ALIGN(32) hash[HASH_SIZE / 4];
|
||||
int thr_id = mythr->id;
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
#if defined(__AES__)
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
free(ctx);
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
#else
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx(hash, pdata, 76, ctx);
|
||||
if (unlikely(hash[7] < ptarget[7])) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
free(ctx);
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
#endif
|
||||
free(ctx);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_cryptolight_algo( algo_gate_t* gate )
|
||||
{
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptolight;
|
||||
gate->hash = (void*)&cryptolight_hash;
|
||||
gate->hash_suw = (void*)&cryptolight_hash;
|
||||
return true;
|
||||
};
|
||||
|
@@ -1,357 +0,0 @@
|
||||
#if defined(__AES__)
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <memory.h>
|
||||
#include "cryptonight.h"
|
||||
#include "miner.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include <immintrin.h>
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
__m128i tmp4;
|
||||
*tmp2 = _mm_shuffle_epi32(*tmp2, 0xFF);
|
||||
tmp4 = _mm_slli_si128(*tmp1, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, tmp4);
|
||||
*tmp1 = _mm_xor_si128(*tmp1, *tmp2);
|
||||
}
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
tmp2 = _mm_shuffle_epi32(tmp4, 0xAA);
|
||||
tmp4 = _mm_slli_si128(*tmp3, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
|
||||
tmp1 = _mm_load_si128((__m128i *)keybuf);
|
||||
tmp3 = _mm_load_si128((__m128i *)(keybuf+0x10));
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x01);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[2] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[3] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x02);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[4] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[5] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x04);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[6] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[7] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x08);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[8] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[9] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x10);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[10] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[11] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x20);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[12] = tmp1;
|
||||
ExpandAESKey256_sub2(&tmp1, &tmp3);
|
||||
keys[13] = tmp3;
|
||||
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
// align to 64 byte cache line
|
||||
typedef struct
|
||||
{
|
||||
uint8_t long_state[MEMORY] __attribute((aligned(64)));
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(64)));
|
||||
uint64_t a[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
|
||||
uint64_t b[AES_BLOCK_SIZE >> 3] __attribute__((aligned(64)));
|
||||
uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(64)));
|
||||
} cryptonight_ctx;
|
||||
|
||||
static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
{
|
||||
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
size_t i, j;
|
||||
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
|
||||
longoutput = (__m128i*)ctx.long_state;
|
||||
xmminput = (__m128i*)ctx.text;
|
||||
expkey = (__m128i*)ExpandedKey;
|
||||
|
||||
// prefetch expkey, xmminput and enough longoutput for 4 iterations
|
||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
{
|
||||
__builtin_prefetch( longoutput + i, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 4, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 8, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 12, 1, 0 );
|
||||
}
|
||||
|
||||
// n-4 iterations
|
||||
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||
i += INIT_SIZE_M128I )
|
||||
{
|
||||
// prefetch 4 iterations ahead.
|
||||
__builtin_prefetch( longoutput + i + 64, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 68, 1, 0 );
|
||||
|
||||
for ( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
|
||||
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
|
||||
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
|
||||
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
|
||||
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
|
||||
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
|
||||
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||
}
|
||||
// last 4 iterations
|
||||
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
{
|
||||
for ( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
|
||||
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
|
||||
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
|
||||
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
|
||||
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
|
||||
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
|
||||
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||
}
|
||||
|
||||
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
|
||||
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
|
||||
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
|
||||
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
|
||||
|
||||
uint64_t a[2] __attribute((aligned(16))),
|
||||
b[2] __attribute((aligned(16))),
|
||||
c[2] __attribute((aligned(16)));
|
||||
a[0] = ctx.a[0];
|
||||
a[1] = ctx.a[1];
|
||||
__m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
|
||||
__m128i a_x = _mm_load_si128( (__m128i*)a );
|
||||
__m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
__m128i c_x = _mm_load_si128( lsa );
|
||||
uint64_t *nextblock;
|
||||
uint64_t hi, lo;
|
||||
|
||||
// n-1 iterations
|
||||
for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
|
||||
{
|
||||
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
||||
__asm__( "mulq %3\n\t"
|
||||
: "=d" ( hi ),
|
||||
"=a" ( lo )
|
||||
: "%a" ( c[0] ),
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
b_x = c_x;
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
a_x = _mm_load_si128( (__m128i*)a );
|
||||
c_x = _mm_load_si128( lsa );
|
||||
}
|
||||
// abreviated nth iteration
|
||||
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
__asm__( "mulq %3\n\t"
|
||||
: "=d" ( hi ),
|
||||
"=a" ( lo )
|
||||
: "%a" ( c[0] ),
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
|
||||
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
{
|
||||
_mm_prefetch( longoutput + i, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 8, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
|
||||
}
|
||||
_mm_prefetch( expkey, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||
|
||||
// n-4 iterations
|
||||
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||
i += INIT_SIZE_M128I )
|
||||
{
|
||||
// stay 4 iterations ahead.
|
||||
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
|
||||
|
||||
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
|
||||
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
|
||||
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
|
||||
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
|
||||
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
|
||||
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
|
||||
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
|
||||
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
|
||||
|
||||
for( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
}
|
||||
// last 4 iterations
|
||||
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
{
|
||||
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
|
||||
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
|
||||
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
|
||||
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
|
||||
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
|
||||
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
|
||||
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
|
||||
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
|
||||
|
||||
for( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
|
||||
}
|
||||
#endif
|
@@ -1,127 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#else
|
||||
#include "crypto/c_groestl.h"
|
||||
#endif
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
/*
|
||||
#if defined __unix__ && (!defined __APPLE__)
|
||||
#include <sys/mman.h>
|
||||
#elif defined _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
*/
|
||||
|
||||
void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
#if defined(__AES__)
|
||||
hashState_groestl256 ctx;
|
||||
init_groestl256( &ctx, 32 );
|
||||
update_and_final_groestl256( &ctx, output, input, len * 8 );
|
||||
#else
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
#endif
|
||||
}
|
||||
|
||||
void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
|
||||
}
|
||||
|
||||
void (* const extra_hashes[4])( const void *, size_t, char *) =
|
||||
{ do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash };
|
||||
|
||||
void cryptonight_hash( void *restrict output, const void *input, int len )
|
||||
{
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, len );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, len );
|
||||
#endif
|
||||
}
|
||||
|
||||
void cryptonight_hash_suw( void *restrict output, const void *input )
|
||||
{
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, 76 );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, 76 );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cryptonightV7 = false;
|
||||
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[32 / 4] __attribute__((aligned(32)));
|
||||
|
||||
// if ( ( cryptonightV7 && ( *(uint8_t*)pdata < 7 ) )
|
||||
// || ( !cryptonightV7 && ( *(uint8_t*)pdata == 7 ) ) )
|
||||
// applog(LOG_WARNING,"Cryptonight variant mismatch, shares may be rejected.");
|
||||
|
||||
do
|
||||
{
|
||||
*nonceptr = ++n;
|
||||
cryptonight_hash( hash, pdata, 76 );
|
||||
if (unlikely( hash[7] < Htarg ))
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
// work_set_target_ratio( work, hash );
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
{
|
||||
cryptonightV7 = false;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_cryptonightv7_algo( algo_gate_t* gate )
|
||||
{
|
||||
cryptonightV7 = true;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
return true;
|
||||
};
|
||||
|
@@ -1,310 +0,0 @@
|
||||
// Copyright (c) 2012-2013 The Cryptonote developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "miner.h"
|
||||
#include <memory.h>
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
#ifndef NOASM
|
||||
#define NOASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
//#include "crypto/hash-ops.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
|
||||
typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
|
||||
#elif defined (_MSC_VER)
|
||||
/* only for mingw64 on windows */
|
||||
#undef USE_INT128
|
||||
#define USE_INT128 (0)
|
||||
#else
|
||||
typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define LITE 0
|
||||
#if LITE /* cryptonight-light */
|
||||
#define MEMORY (1 << 20)
|
||||
#define ITER (1 << 19)
|
||||
#else
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define ITER (1 << 20)
|
||||
#endif
|
||||
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
/*
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
static void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
blake256_hash((uint8_t*)output, input, len);
|
||||
}
|
||||
|
||||
static void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
}
|
||||
|
||||
static void do_jh_hash(const void* input, size_t len, char* output) {
|
||||
int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SUCCESS == r));
|
||||
}
|
||||
|
||||
static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
*/
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#if !defined(_MSC_VER) && !defined(NOASM)
|
||||
extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
#else
|
||||
#define fast_aesb_single_round aesb_single_round
|
||||
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(NOASM) || !defined(__x86_64__)
|
||||
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
uint64_t a = hi_dword(multiplier);
|
||||
uint64_t b = lo_dword(multiplier);
|
||||
uint64_t c = hi_dword(multiplicand);
|
||||
uint64_t d = lo_dword(multiplicand);
|
||||
|
||||
uint64_t ac = a * c;
|
||||
uint64_t ad = a * d;
|
||||
uint64_t bc = b * c;
|
||||
uint64_t bd = b * d;
|
||||
|
||||
uint64_t adbc = ad + bc;
|
||||
uint64_t adbc_carry = adbc < ad ? 1 : 0;
|
||||
|
||||
// multiplier * multiplicand = product_hi * 2^64 + product_lo
|
||||
uint64_t product_lo = bd + (adbc << 32);
|
||||
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
|
||||
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
|
||||
assert(ac <= *product_hi);
|
||||
|
||||
return product_lo;
|
||||
}
|
||||
#else
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
/*
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
*/
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
return ((uint32_t *)a)[0] & 0x1FFFF0;
|
||||
#else
|
||||
return ((uint32_t *)a)[0] & 0xFFFF0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst( const uint8_t* a, uint8_t* c, uint8_t* dst,
|
||||
const uint64_t tweak )
|
||||
{
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = cryptonightV7 ? lo ^ tweak : lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) a) ^= *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
|
||||
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
|
||||
#if USE_INT128
|
||||
*((uint128_t*) dst) = *((uint128_t*) a) ^ *((uint128_t*) b);
|
||||
#else
|
||||
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
|
||||
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint8_t _ALIGN(16) long_state[MEMORY];
|
||||
union cn_slow_hash_state state;
|
||||
uint8_t _ALIGN(16) text[INIT_SIZE_BYTE];
|
||||
uint8_t _ALIGN(16) a[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) b[AES_BLOCK_SIZE];
|
||||
uint8_t _ALIGN(16) c[AES_BLOCK_SIZE];
|
||||
oaes_ctx* aes_ctx;
|
||||
} cryptonight_ctx;
|
||||
|
||||
static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
{
|
||||
// hash_process(&ctx.state.hs, (const uint8_t*) input, len);
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
__builtin_prefetch( ctx.text + 64, 0, 3 );
|
||||
__builtin_prefetch( ctx.long_state, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 64, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 128, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 192, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 256, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 320, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 384, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 448, 1, 0 );
|
||||
|
||||
size_t i, j;
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
|
||||
oaes_key_import_data(ctx.aes_ctx, ctx.state.hs.b, AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
|
||||
__builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
|
||||
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 0], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 1], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 2], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 3], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 4], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 5], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 6], ctx.aes_ctx->key->exp_data);
|
||||
aesb_pseudo_round_mut(&ctx.text[AES_BLOCK_SIZE * 7], ctx.aes_ctx->key->exp_data);
|
||||
memcpy(&ctx.long_state[i], ctx.text, INIT_SIZE_BYTE);
|
||||
}
|
||||
|
||||
xor_blocks_dst(&ctx.state.k[0], &ctx.state.k[32], ctx.a);
|
||||
xor_blocks_dst(&ctx.state.k[16], &ctx.state.k[48], ctx.b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i)
|
||||
{
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
|
||||
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)], tweak );
|
||||
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
|
||||
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)], tweak );
|
||||
|
||||
}
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
__builtin_prefetch( ctx.text + 64, 0, 3 );
|
||||
__builtin_prefetch( ctx.long_state, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 64, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 128, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 192, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 256, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 320, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 384, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + 448, 1, 0 );
|
||||
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
oaes_key_import_data(ctx.aes_ctx, &ctx.state.hs.b[32], AES_KEY_SIZE);
|
||||
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
|
||||
|
||||
__builtin_prefetch( ctx.long_state + i + 512, 1, 0 );
|
||||
__builtin_prefetch( ctx.long_state + i + 576, 1, 0 );
|
||||
|
||||
xor_blocks(&ctx.text[0 * AES_BLOCK_SIZE], &ctx.long_state[i + 0 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[0 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[1 * AES_BLOCK_SIZE], &ctx.long_state[i + 1 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[1 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[2 * AES_BLOCK_SIZE], &ctx.long_state[i + 2 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[2 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[3 * AES_BLOCK_SIZE], &ctx.long_state[i + 3 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[3 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[4 * AES_BLOCK_SIZE], &ctx.long_state[i + 4 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[4 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[5 * AES_BLOCK_SIZE], &ctx.long_state[i + 5 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[5 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[6 * AES_BLOCK_SIZE], &ctx.long_state[i + 6 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[6 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
xor_blocks(&ctx.text[7 * AES_BLOCK_SIZE], &ctx.long_state[i + 7 * AES_BLOCK_SIZE]);
|
||||
aesb_pseudo_round_mut(&ctx.text[7 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
// hash_permutation(&ctx.state.hs);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx.aes_ctx);
|
||||
}
|
||||
|
@@ -1,51 +0,0 @@
|
||||
#ifndef __CRYPTONIGHT_H_INCLUDED
|
||||
#define __CRYPTONIGHT_H_INCLUDED
|
||||
|
||||
#include <stddef.h>
|
||||
#include "crypto/oaes_lib.h"
|
||||
#include "miner.h"
|
||||
|
||||
#define MEMORY (1 << 21) /* 2 MiB */
|
||||
#define MEMORY_M128I (MEMORY >> 4) // 2 MiB / 16 = 128 ki * __m128i
|
||||
#define ITER (1 << 20)
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KEY_SIZE 32 /*16*/
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128
|
||||
#define INIT_SIZE_M128I (INIT_SIZE_BYTE >> 4) // 8
|
||||
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union hash_state {
|
||||
uint8_t b[200];
|
||||
uint64_t w[25];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
struct {
|
||||
uint8_t k[64];
|
||||
uint8_t init[INIT_SIZE_BYTE];
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
void do_blake_hash(const void* input, size_t len, char* output);
|
||||
void do_groestl_hash(const void* input, size_t len, char* output);
|
||||
void do_jh_hash(const void* input, size_t len, char* output);
|
||||
void do_skein_hash(const void* input, size_t len, char* output);
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len);
|
||||
void keccakf(uint64_t st[25], int rounds);
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len );
|
||||
|
||||
extern bool cryptonightV7;
|
||||
|
||||
#endif
|
||||
|
@@ -179,14 +179,6 @@ int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||
sp->rounds = 16;
|
||||
sp->pos = 0;
|
||||
|
||||
h[ 0] = m512_const1_128( iv[0] );
|
||||
h[ 1] = m512_const1_128( iv[1] );
|
||||
h[ 2] = m512_const1_128( iv[2] );
|
||||
h[ 3] = m512_const1_128( iv[3] );
|
||||
h[ 4] = m512_const1_128( iv[4] );
|
||||
h[ 5] = m512_const1_128( iv[5] );
|
||||
h[ 6] = m512_const1_128( iv[6] );
|
||||
h[ 7] = m512_const1_128( iv[7] );
|
||||
h[ 0] = m512_const1_128( iv[0] );
|
||||
h[ 1] = m512_const1_128( iv[1] );
|
||||
h[ 2] = m512_const1_128( iv[2] );
|
||||
@@ -447,14 +439,6 @@ int cube_2way_full( cube_2way_context *sp, void *output, int hashbitlen,
|
||||
sp->rounds = 16;
|
||||
sp->pos = 0;
|
||||
|
||||
h[ 0] = m256_const1_128( iv[0] );
|
||||
h[ 1] = m256_const1_128( iv[1] );
|
||||
h[ 2] = m256_const1_128( iv[2] );
|
||||
h[ 3] = m256_const1_128( iv[3] );
|
||||
h[ 4] = m256_const1_128( iv[4] );
|
||||
h[ 5] = m256_const1_128( iv[5] );
|
||||
h[ 6] = m256_const1_128( iv[6] );
|
||||
h[ 7] = m256_const1_128( iv[7] );
|
||||
h[ 0] = m256_const1_128( iv[0] );
|
||||
h[ 1] = m256_const1_128( iv[1] );
|
||||
h[ 2] = m256_const1_128( iv[2] );
|
||||
|
@@ -28,6 +28,27 @@ int cube_4way_update_close( cube_4way_context *sp, void *output,
|
||||
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||
const void *data, size_t size );
|
||||
|
||||
int cube_4x256_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||
const void *data, size_t size );
|
||||
|
||||
#define cube512_4way_init( sp ) cube_4way_update( sp, 512 )
|
||||
#define cube512_4way_update cube_4way_update
|
||||
#define cube512_4way_update_close cube_4way_update
|
||||
#define cube512_4way_close cube_4way_update
|
||||
#define cube512_4way_full( sp, output, data, size ) \
|
||||
cube_4way_full( sp, output, 512, data, size )
|
||||
#define cube512_4x256_full( sp, output, data, size ) \
|
||||
cube_4x256_full( sp, output, 512, data, size )
|
||||
|
||||
#define cube256_4way_init( sp ) cube_4way_update( sp, 256 )
|
||||
#define cube256_4way_update cube_4way_update
|
||||
#define cube256_4way_update_close cube_4way_update
|
||||
#define cube256_4way_close cube_4way_update
|
||||
#define cube256_4way_full( sp, output, data, size ) \
|
||||
cube_4way_full( sp, output, 256, data, size )
|
||||
#define cube256_4x256_full( sp, output, data, size ) \
|
||||
cube_4x256_full( sp, output, 256, data, size )
|
||||
|
||||
#endif
|
||||
|
||||
// 2x128, 2 way parallel SSE2
|
||||
|
@@ -22,18 +22,26 @@ typedef struct
|
||||
} echo_4way_context __attribute__ ((aligned (64)));
|
||||
|
||||
int echo_4way_init( echo_4way_context *state, int hashbitlen );
|
||||
|
||||
#define echo512_4way_init( state ) echo_4way_init( state, 512 )
|
||||
#define echo256_4way_init( state ) echo_4way_init( state, 256 )
|
||||
|
||||
int echo_4way_update( echo_4way_context *state, const void *data,
|
||||
unsigned int databitlen);
|
||||
#define echo512_4way_update echo_4way_update
|
||||
|
||||
int echo_close( echo_4way_context *state, void *hashval );
|
||||
#define echo512_4way_close echo_4way_close
|
||||
|
||||
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||
const void *data, int databitlen );
|
||||
#define echo512_4way_update_close echo_4way_update_close
|
||||
|
||||
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||
const void *data, int datalen );
|
||||
#define echo512_4way_full( state, hashval, data, datalen ) \
|
||||
echo_4way_full( state, hashval, 512, data, datalen )
|
||||
#define echo256_4way_full( state, hashval, data, datalen ) \
|
||||
echo_4way_full( state, hashval, 256, data, datalen )
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -36,6 +36,8 @@
|
||||
|
||||
#include "sph_echo.h"
|
||||
|
||||
#if !defined(__AES__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
@@ -1028,4 +1030,5 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif // !AES
|
||||
|
@@ -36,6 +36,8 @@
|
||||
#ifndef SPH_ECHO_H__
|
||||
#define SPH_ECHO_H__
|
||||
|
||||
#if !defined(__AES__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
@@ -316,5 +318,5 @@ void sph_echo512_addbits_and_close(
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // !AES
|
||||
#endif
|
||||
|
@@ -74,6 +74,14 @@ void sph_fugue512_close(void *cc, void *dst);
|
||||
void sph_fugue512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#define sph_fugue512_full( cc, dst, data, len ) \
|
||||
do{ \
|
||||
sph_fugue512_init( cc ); \
|
||||
sph_fugue512( cc, data, len ); \
|
||||
sph_fugue512_close( cc, dst ); \
|
||||
}while(0)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -1,3 +1,6 @@
|
||||
#if !defined GROESTL_INTR_AES_H__
|
||||
#define GROESTL_INTR_AES_H__
|
||||
|
||||
/* groestl-intr-aes.h Aug 2011
|
||||
*
|
||||
* Groestl implementation with intrinsics using ssse3, sse4.1, and aes
|
||||
@@ -11,6 +14,52 @@
|
||||
#include <wmmintrin.h>
|
||||
#include "hash-groestl.h"
|
||||
|
||||
static const __m128i round_const_p[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x7060504030201000, 0xf0e0d0c0b0a09080 },
|
||||
{ 0x7161514131211101, 0xf1e1d1c1b1a19181 },
|
||||
{ 0x7262524232221202, 0xf2e2d2c2b2a29282 },
|
||||
{ 0x7363534333231303, 0xf3e3d3c3b3a39383 },
|
||||
{ 0x7464544434241404, 0xf4e4d4c4b4a49484 },
|
||||
{ 0x7565554535251505, 0xf5e5d5c5b5a59585 },
|
||||
{ 0x7666564636261606, 0xf6e6d6c6b6a69686 },
|
||||
{ 0x7767574737271707, 0xf7e7d7c7b7a79787 },
|
||||
{ 0x7868584838281808, 0xf8e8d8c8b8a89888 },
|
||||
{ 0x7969594939291909, 0xf9e9d9c9b9a99989 },
|
||||
{ 0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a },
|
||||
{ 0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b },
|
||||
{ 0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c },
|
||||
{ 0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d }
|
||||
};
|
||||
|
||||
static const __m128i round_const_q[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f },
|
||||
{ 0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e },
|
||||
{ 0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d },
|
||||
{ 0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c },
|
||||
{ 0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b },
|
||||
{ 0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a },
|
||||
{ 0x8999a9b9c9d9e9f9, 0x0919293949596979 },
|
||||
{ 0x8898a8b8c8d8e8f8, 0x0818283848586878 },
|
||||
{ 0x8797a7b7c7d7e7f7, 0x0717273747576777 },
|
||||
{ 0x8696a6b6c6d6e6f6, 0x0616263646566676 },
|
||||
{ 0x8595a5b5c5d5e5f5, 0x0515253545556575 },
|
||||
{ 0x8494a4b4c4d4e4f4, 0x0414243444546474 },
|
||||
{ 0x8393a3b3c3d3e3f3, 0x0313233343536373 },
|
||||
{ 0x8292a2b2c2d2e2f2, 0x0212223242526272 }
|
||||
};
|
||||
|
||||
static const __m128i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02 };
|
||||
static const __m128i SUBSH_MASK0 = { 0x0b0e0104070a0d00, 0x0306090c0f020508 };
|
||||
static const __m128i SUBSH_MASK1 = { 0x0c0f0205080b0e01, 0x04070a0d00030609 };
|
||||
static const __m128i SUBSH_MASK2 = { 0x0d000306090c0f02, 0x05080b0e0104070a };
|
||||
static const __m128i SUBSH_MASK3 = { 0x0e0104070a0d0003, 0x06090c0f0205080b };
|
||||
static const __m128i SUBSH_MASK4 = { 0x0f0205080b0e0104, 0x070a0d000306090c };
|
||||
static const __m128i SUBSH_MASK5 = { 0x000306090c0f0205, 0x080b0e0104070a0d };
|
||||
static const __m128i SUBSH_MASK6 = { 0x0104070a0d000306, 0x090c0f0205080b0e };
|
||||
static const __m128i SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003 };
|
||||
|
||||
#define tos(a) #a
|
||||
#define tostr(a) tos(a)
|
||||
|
||||
@@ -141,42 +190,6 @@
|
||||
}/*MixBytes*/
|
||||
|
||||
|
||||
static const uint64_t round_const_p[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
0x7060504030201000, 0xf0e0d0c0b0a09080,
|
||||
0x7161514131211101, 0xf1e1d1c1b1a19181,
|
||||
0x7262524232221202, 0xf2e2d2c2b2a29282,
|
||||
0x7363534333231303, 0xf3e3d3c3b3a39383,
|
||||
0x7464544434241404, 0xf4e4d4c4b4a49484,
|
||||
0x7565554535251505, 0xf5e5d5c5b5a59585,
|
||||
0x7666564636261606, 0xf6e6d6c6b6a69686,
|
||||
0x7767574737271707, 0xf7e7d7c7b7a79787,
|
||||
0x7868584838281808, 0xf8e8d8c8b8a89888,
|
||||
0x7969594939291909, 0xf9e9d9c9b9a99989,
|
||||
0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a,
|
||||
0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b,
|
||||
0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c,
|
||||
0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d
|
||||
};
|
||||
|
||||
static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f,
|
||||
0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e,
|
||||
0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d,
|
||||
0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c,
|
||||
0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b,
|
||||
0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a,
|
||||
0x8999a9b9c9d9e9f9, 0x0919293949596979,
|
||||
0x8898a8b8c8d8e8f8, 0x0818283848586878,
|
||||
0x8797a7b7c7d7e7f7, 0x0717273747576777,
|
||||
0x8696a6b6c6d6e6f6, 0x0616263646566676,
|
||||
0x8595a5b5c5d5e5f5, 0x0515253545556575,
|
||||
0x8494a4b4c4d4e4f4, 0x0414243444546474,
|
||||
0x8393a3b3c3d3e3f3, 0x0313233343536373,
|
||||
0x8292a2b2c2d2e2f2, 0x0212223242526272
|
||||
};
|
||||
|
||||
/* one round
|
||||
* a0-a7 = input rows
|
||||
* b0-b7 = output rows
|
||||
@@ -203,22 +216,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
|
||||
xmm8 = _mm_xor_si128( xmm8, \
|
||||
casti_m128i( round_const_p, round_counter ) ); \
|
||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
||||
xmm8 = _mm_shuffle_epi8( xmm8, m128_const_64( 0x0306090c0f020508, \
|
||||
0x0b0e0104070a0d00 ) ); \
|
||||
xmm9 = _mm_shuffle_epi8( xmm9, m128_const_64( 0x04070a0d00030609, \
|
||||
0x0c0f0205080b0e01 ) ); \
|
||||
xmm10 = _mm_shuffle_epi8( xmm10, m128_const_64( 0x05080b0e0104070a, \
|
||||
0x0d000306090c0f02 ) ); \
|
||||
xmm11 = _mm_shuffle_epi8( xmm11, m128_const_64( 0x06090c0f0205080b, \
|
||||
0x0e0104070a0d0003 ) ); \
|
||||
xmm12 = _mm_shuffle_epi8( xmm12, m128_const_64( 0x070a0d000306090c, \
|
||||
0x0f0205080b0e0104 ) ); \
|
||||
xmm13 = _mm_shuffle_epi8( xmm13, m128_const_64( 0x080b0e0104070a0d, \
|
||||
0x000306090c0f0205 ) ); \
|
||||
xmm14 = _mm_shuffle_epi8( xmm14, m128_const_64( 0x090c0f0205080b0e, \
|
||||
0x0104070a0d000306 ) ); \
|
||||
xmm15 = _mm_shuffle_epi8( xmm15, m128_const_64( 0x0e0104070a0d0003, \
|
||||
0x06090c0f0205080b ) ); \
|
||||
xmm8 = _mm_shuffle_epi8( xmm8, SUBSH_MASK0 ); \
|
||||
xmm9 = _mm_shuffle_epi8( xmm9, SUBSH_MASK1 ); \
|
||||
xmm10 = _mm_shuffle_epi8( xmm10, SUBSH_MASK2 ); \
|
||||
xmm11 = _mm_shuffle_epi8( xmm11, SUBSH_MASK3 ); \
|
||||
xmm12 = _mm_shuffle_epi8( xmm12, SUBSH_MASK4 ); \
|
||||
xmm13 = _mm_shuffle_epi8( xmm13, SUBSH_MASK5 ); \
|
||||
xmm14 = _mm_shuffle_epi8( xmm14, SUBSH_MASK6 ); \
|
||||
xmm15 = _mm_shuffle_epi8( xmm15, SUBSH_MASK7 ); \
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 ); \
|
||||
@@ -226,22 +231,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
|
||||
/* AddRoundConstant P1024 */\
|
||||
xmm0 = _mm_xor_si128( xmm0, \
|
||||
casti_m128i( round_const_p, round_counter+1 ) ); \
|
||||
xmm0 = _mm_shuffle_epi8( xmm0, m128_const_64( 0x0306090c0f020508, \
|
||||
0x0b0e0104070a0d00 ) ); \
|
||||
xmm1 = _mm_shuffle_epi8( xmm1, m128_const_64( 0x04070a0d00030609, \
|
||||
0x0c0f0205080b0e01 ) ); \
|
||||
xmm2 = _mm_shuffle_epi8( xmm2, m128_const_64( 0x05080b0e0104070a, \
|
||||
0x0d000306090c0f02 ) ); \
|
||||
xmm3 = _mm_shuffle_epi8( xmm3, m128_const_64( 0x06090c0f0205080b, \
|
||||
0x0e0104070a0d0003 ) ); \
|
||||
xmm4 = _mm_shuffle_epi8( xmm4, m128_const_64( 0x070a0d000306090c, \
|
||||
0x0f0205080b0e0104 ) ); \
|
||||
xmm5 = _mm_shuffle_epi8( xmm5, m128_const_64( 0x080b0e0104070a0d, \
|
||||
0x000306090c0f0205 ) ); \
|
||||
xmm6 = _mm_shuffle_epi8( xmm6, m128_const_64( 0x090c0f0205080b0e, \
|
||||
0x0104070a0d000306 ) ); \
|
||||
xmm7 = _mm_shuffle_epi8( xmm7, m128_const_64( 0x0e0104070a0d0003, \
|
||||
0x06090c0f0205080b ) ); \
|
||||
xmm0 = _mm_shuffle_epi8( xmm0, SUBSH_MASK0 ); \
|
||||
xmm1 = _mm_shuffle_epi8( xmm1, SUBSH_MASK1 ); \
|
||||
xmm2 = _mm_shuffle_epi8( xmm2, SUBSH_MASK2 ); \
|
||||
xmm3 = _mm_shuffle_epi8( xmm3, SUBSH_MASK3 ); \
|
||||
xmm4 = _mm_shuffle_epi8( xmm4, SUBSH_MASK4 ); \
|
||||
xmm5 = _mm_shuffle_epi8( xmm5, SUBSH_MASK5 ); \
|
||||
xmm6 = _mm_shuffle_epi8( xmm6, SUBSH_MASK6 ); \
|
||||
xmm7 = _mm_shuffle_epi8( xmm7, SUBSH_MASK7 ); \
|
||||
SUBMIX( xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, \
|
||||
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 ); \
|
||||
}\
|
||||
@@ -262,22 +259,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
|
||||
xmm15 = _mm_xor_si128( xmm15, \
|
||||
casti_m128i( round_const_q, round_counter ) ); \
|
||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
||||
xmm8 = _mm_shuffle_epi8( xmm8, m128_const_64( 0x04070a0d00030609, \
|
||||
0x0c0f0205080b0e01 ) ); \
|
||||
xmm9 = _mm_shuffle_epi8( xmm9, m128_const_64( 0x06090c0f0205080b, \
|
||||
0x0e0104070a0d0003 ) ); \
|
||||
xmm10 = _mm_shuffle_epi8( xmm10, m128_const_64( 0x080b0e0104070a0d, \
|
||||
0x000306090c0f0205 ) ); \
|
||||
xmm11 = _mm_shuffle_epi8( xmm11, m128_const_64( 0x0e0104070a0d0003, \
|
||||
0x06090c0f0205080b ) ); \
|
||||
xmm12 = _mm_shuffle_epi8( xmm12, m128_const_64( 0x0306090c0f020508, \
|
||||
0x0b0e0104070a0d00 ) ); \
|
||||
xmm13 = _mm_shuffle_epi8( xmm13, m128_const_64( 0x05080b0e0104070a, \
|
||||
0x0d000306090c0f02 ) ); \
|
||||
xmm14 = _mm_shuffle_epi8( xmm14, m128_const_64( 0x070a0d000306090c, \
|
||||
0x0f0205080b0e0104 ) ); \
|
||||
xmm15 = _mm_shuffle_epi8( xmm15, m128_const_64( 0x090c0f0205080b0e, \
|
||||
0x0104070a0d000306 ) ); \
|
||||
xmm8 = _mm_shuffle_epi8( xmm8, SUBSH_MASK1 ); \
|
||||
xmm9 = _mm_shuffle_epi8( xmm9, SUBSH_MASK3 ); \
|
||||
xmm10 = _mm_shuffle_epi8( xmm10, SUBSH_MASK5 ); \
|
||||
xmm11 = _mm_shuffle_epi8( xmm11, SUBSH_MASK7 ); \
|
||||
xmm12 = _mm_shuffle_epi8( xmm12, SUBSH_MASK0 ); \
|
||||
xmm13 = _mm_shuffle_epi8( xmm13, SUBSH_MASK2 ); \
|
||||
xmm14 = _mm_shuffle_epi8( xmm14, SUBSH_MASK4 ); \
|
||||
xmm15 = _mm_shuffle_epi8( xmm15, SUBSH_MASK6 ); \
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX( xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, \
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 , xmm7 ); \
|
||||
@@ -294,22 +283,14 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
|
||||
xmm7 = _mm_xor_si128( xmm7, \
|
||||
casti_m128i( round_const_q, round_counter+1 ) ); \
|
||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
||||
xmm0 = _mm_shuffle_epi8( xmm0, m128_const_64( 0x04070a0d00030609, \
|
||||
0x0c0f0205080b0e01 ) ); \
|
||||
xmm1 = _mm_shuffle_epi8( xmm1, m128_const_64( 0x06090c0f0205080b, \
|
||||
0x0e0104070a0d0003 ) ); \
|
||||
xmm2 = _mm_shuffle_epi8( xmm2, m128_const_64( 0x080b0e0104070a0d, \
|
||||
0x000306090c0f0205 ) ); \
|
||||
xmm3 = _mm_shuffle_epi8( xmm3, m128_const_64( 0x0e0104070a0d0003, \
|
||||
0x06090c0f0205080b ) ); \
|
||||
xmm4 = _mm_shuffle_epi8( xmm4, m128_const_64( 0x0306090c0f020508, \
|
||||
0x0b0e0104070a0d00 ) ); \
|
||||
xmm5 = _mm_shuffle_epi8( xmm5, m128_const_64( 0x05080b0e0104070a, \
|
||||
0x0d000306090c0f02 ) ); \
|
||||
xmm6 = _mm_shuffle_epi8( xmm6, m128_const_64( 0x070a0d000306090c, \
|
||||
0x0f0205080b0e0104 ) ); \
|
||||
xmm7 = _mm_shuffle_epi8( xmm7, m128_const_64( 0x090c0f0205080b0e, \
|
||||
0x0104070a0d000306 ) ); \
|
||||
xmm0 = _mm_shuffle_epi8( xmm0, SUBSH_MASK1 ); \
|
||||
xmm1 = _mm_shuffle_epi8( xmm1, SUBSH_MASK3 ); \
|
||||
xmm2 = _mm_shuffle_epi8( xmm2, SUBSH_MASK5 ); \
|
||||
xmm3 = _mm_shuffle_epi8( xmm3, SUBSH_MASK7 ); \
|
||||
xmm4 = _mm_shuffle_epi8( xmm4, SUBSH_MASK0 ); \
|
||||
xmm5 = _mm_shuffle_epi8( xmm5, SUBSH_MASK2 ); \
|
||||
xmm6 = _mm_shuffle_epi8( xmm6, SUBSH_MASK4 ); \
|
||||
xmm7 = _mm_shuffle_epi8( xmm7, SUBSH_MASK6 ); \
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX( xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, \
|
||||
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 ); \
|
||||
@@ -324,7 +305,7 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
|
||||
* clobbers: t0-t7
|
||||
*/
|
||||
#define Matrix_Transpose(i0, i1, i2, i3, i4, i5, i6, i7, t0, t1, t2, t3, t4, t5, t6, t7){\
|
||||
t0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 );\
|
||||
t0 = TRANSP_MASK; \
|
||||
\
|
||||
i6 = _mm_shuffle_epi8(i6, t0);\
|
||||
i0 = _mm_shuffle_epi8(i0, t0);\
|
||||
@@ -412,7 +393,7 @@ static const uint64_t round_const_q[] __attribute__ ((aligned (64))) =
|
||||
i4 = _mm_unpacklo_epi64(i4, i5);\
|
||||
t1 = _mm_unpackhi_epi64(t1, i5);\
|
||||
t2 = i6;\
|
||||
o0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 ); \
|
||||
o0 = TRANSP_MASK; \
|
||||
i6 = _mm_unpacklo_epi64(i6, i7);\
|
||||
t2 = _mm_unpackhi_epi64(t2, i7);\
|
||||
/* load transpose mask into a register, because it will be used 8 times */\
|
||||
@@ -653,3 +634,4 @@ void OF1024( __m128i* chaining )
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -11,6 +11,45 @@
|
||||
#include <wmmintrin.h>
|
||||
#include "hash-groestl256.h"
|
||||
|
||||
static const __m128i round_const_l0[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x7060504030201000, 0xffffffffffffffff },
|
||||
{ 0x7161514131211101, 0xffffffffffffffff },
|
||||
{ 0x7262524232221202, 0xffffffffffffffff },
|
||||
{ 0x7363534333231303, 0xffffffffffffffff },
|
||||
{ 0x7464544434241404, 0xffffffffffffffff },
|
||||
{ 0x7565554535251505, 0xffffffffffffffff },
|
||||
{ 0x7666564636261606, 0xffffffffffffffff },
|
||||
{ 0x7767574737271707, 0xffffffffffffffff },
|
||||
{ 0x7868584838281808, 0xffffffffffffffff },
|
||||
{ 0x7969594939291909, 0xffffffffffffffff }
|
||||
};
|
||||
|
||||
static const __m128i round_const_l7[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x0000000000000000, 0x8f9fafbfcfdfefff },
|
||||
{ 0x0000000000000000, 0x8e9eaebecedeeefe },
|
||||
{ 0x0000000000000000, 0x8d9dadbdcdddedfd },
|
||||
{ 0x0000000000000000, 0x8c9cacbcccdcecfc },
|
||||
{ 0x0000000000000000, 0x8b9babbbcbdbebfb },
|
||||
{ 0x0000000000000000, 0x8a9aaabacadaeafa },
|
||||
{ 0x0000000000000000, 0x8999a9b9c9d9e9f9 },
|
||||
{ 0x0000000000000000, 0x8898a8b8c8d8e8f8 },
|
||||
{ 0x0000000000000000, 0x8797a7b7c7d7e7f7 },
|
||||
{ 0x0000000000000000, 0x8696a6b6c6d6e6f6 }
|
||||
};
|
||||
|
||||
static const __m128i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02 };
|
||||
|
||||
static const __m128i SUBSH_MASK0 = { 0x0c0f0104070b0e00, 0x03060a0d08020509 };
|
||||
static const __m128i SUBSH_MASK1 = { 0x0e090205000d0801, 0x04070c0f0a03060b };
|
||||
static const __m128i SUBSH_MASK2 = { 0x080b0306010f0a02, 0x05000e090c04070d };
|
||||
static const __m128i SUBSH_MASK3 = { 0x0a0d040702090c03, 0x0601080b0e05000f };
|
||||
static const __m128i SUBSH_MASK4 = { 0x0b0e0500030a0d04, 0x0702090c0f060108 };
|
||||
static const __m128i SUBSH_MASK5 = { 0x0d080601040c0f05, 0x00030b0e0907020a };
|
||||
static const __m128i SUBSH_MASK6 = { 0x0f0a0702050e0906, 0x01040d080b00030c };
|
||||
static const __m128i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e };
|
||||
|
||||
#define tos(a) #a
|
||||
#define tostr(a) tos(a)
|
||||
|
||||
@@ -26,8 +65,6 @@
|
||||
i = _mm_xor_si128(i, j);\
|
||||
}
|
||||
|
||||
/**/
|
||||
|
||||
/* Yet another implementation of MixBytes.
|
||||
This time we use the formulae (3) from the paper "Byte Slicing Groestl".
|
||||
Input: a0, ..., a7
|
||||
@@ -141,36 +178,6 @@
|
||||
b1 = _mm_xor_si128(b1, a4);\
|
||||
}/*MixBytes*/
|
||||
|
||||
|
||||
static const uint64_t round_const_l0[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
0x7060504030201000, 0xffffffffffffffff,
|
||||
0x7161514131211101, 0xffffffffffffffff,
|
||||
0x7262524232221202, 0xffffffffffffffff,
|
||||
0x7363534333231303, 0xffffffffffffffff,
|
||||
0x7464544434241404, 0xffffffffffffffff,
|
||||
0x7565554535251505, 0xffffffffffffffff,
|
||||
0x7666564636261606, 0xffffffffffffffff,
|
||||
0x7767574737271707, 0xffffffffffffffff,
|
||||
0x7868584838281808, 0xffffffffffffffff,
|
||||
0x7969594939291909, 0xffffffffffffffff
|
||||
};
|
||||
|
||||
static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
0x0000000000000000, 0x8f9fafbfcfdfefff,
|
||||
0x0000000000000000, 0x8e9eaebecedeeefe,
|
||||
0x0000000000000000, 0x8d9dadbdcdddedfd,
|
||||
0x0000000000000000, 0x8c9cacbcccdcecfc,
|
||||
0x0000000000000000, 0x8b9babbbcbdbebfb,
|
||||
0x0000000000000000, 0x8a9aaabacadaeafa,
|
||||
0x0000000000000000, 0x8999a9b9c9d9e9f9,
|
||||
0x0000000000000000, 0x8898a8b8c8d8e8f8,
|
||||
0x0000000000000000, 0x8797a7b7c7d7e7f7,
|
||||
0x0000000000000000, 0x8696a6b6c6d6e6f6
|
||||
};
|
||||
|
||||
|
||||
/* one round
|
||||
* i = round number
|
||||
* a0-a7 = input rows
|
||||
@@ -190,29 +197,21 @@ static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) =
|
||||
\
|
||||
/* ShiftBytes + SubBytes (interleaved) */\
|
||||
b0 = _mm_xor_si128(b0, b0);\
|
||||
a0 = _mm_shuffle_epi8( a0, m128_const_64( 0x03060a0d08020509, \
|
||||
0x0c0f0104070b0e00 ) ); \
|
||||
a0 = _mm_shuffle_epi8( a0, SUBSH_MASK0 ); \
|
||||
a0 = _mm_aesenclast_si128( a0, b0 );\
|
||||
a1 = _mm_shuffle_epi8( a1, m128_const_64( 0x04070c0f0a03060b, \
|
||||
0x0e090205000d0801 ) ); \
|
||||
a1 = _mm_shuffle_epi8( a1, SUBSH_MASK1 ); \
|
||||
a1 = _mm_aesenclast_si128( a1, b0 );\
|
||||
a2 = _mm_shuffle_epi8( a2, m128_const_64( 0x05000e090c04070d, \
|
||||
0x080b0306010f0a02 ) ); \
|
||||
a2 = _mm_shuffle_epi8( a2, SUBSH_MASK2 ); \
|
||||
a2 = _mm_aesenclast_si128( a2, b0 );\
|
||||
a3 = _mm_shuffle_epi8( a3, m128_const_64( 0x0601080b0e05000f, \
|
||||
0x0a0d040702090c03 ) ); \
|
||||
a3 = _mm_shuffle_epi8( a3, SUBSH_MASK3 ); \
|
||||
a3 = _mm_aesenclast_si128( a3, b0 );\
|
||||
a4 = _mm_shuffle_epi8( a4, m128_const_64( 0x0702090c0f060108, \
|
||||
0x0b0e0500030a0d04 ) ); \
|
||||
a4 = _mm_shuffle_epi8( a4, SUBSH_MASK4 ); \
|
||||
a4 = _mm_aesenclast_si128( a4, b0 );\
|
||||
a5 = _mm_shuffle_epi8( a5, m128_const_64( 0x00030b0e0907020a, \
|
||||
0x0d080601040c0f05 ) ); \
|
||||
a5 = _mm_shuffle_epi8( a5, SUBSH_MASK5 ); \
|
||||
a5 = _mm_aesenclast_si128( a5, b0 );\
|
||||
a6 = _mm_shuffle_epi8( a6, m128_const_64( 0x01040d080b00030c, \
|
||||
0x0f0a0702050e0906 ) ); \
|
||||
a6 = _mm_shuffle_epi8( a6, SUBSH_MASK6 ); \
|
||||
a6 = _mm_aesenclast_si128( a6, b0 );\
|
||||
a7 = _mm_shuffle_epi8( a7, m128_const_64( 0x02050f0a0d01040e, \
|
||||
0x090c000306080b07 ) ); \
|
||||
a7 = _mm_shuffle_epi8( a7, SUBSH_MASK7 ); \
|
||||
a7 = _mm_aesenclast_si128( a7, b0 );\
|
||||
\
|
||||
/* MixBytes */\
|
||||
@@ -241,8 +240,9 @@ static const uint64_t round_const_l7[] __attribute__ ((aligned (64))) =
|
||||
* outputs: i0, o1-o3
|
||||
* clobbers: t0
|
||||
*/
|
||||
|
||||
#define Matrix_Transpose_A(i0, i1, i2, i3, o1, o2, o3, t0){\
|
||||
t0 = m128_const_64( 0x0f070b030e060a02, 0x0d0509010c040800 ); \
|
||||
t0 = TRANSP_MASK; \
|
||||
\
|
||||
i0 = _mm_shuffle_epi8(i0, t0);\
|
||||
i1 = _mm_shuffle_epi8(i1, t0);\
|
||||
|
@@ -214,6 +214,98 @@ HashReturn_gr update_and_final_groestl256( hashState_groestl256* ctx,
|
||||
return SUCCESS_GR;
|
||||
}
|
||||
|
||||
int groestl256_full( hashState_groestl256* ctx,
|
||||
void* output, const void* input, DataLength_gr databitlen )
|
||||
{
|
||||
int i;
|
||||
ctx->hashlen = 32;
|
||||
for ( i = 0; i < SIZE256; i++ )
|
||||
{
|
||||
ctx->chaining[i] = _mm_setzero_si128();
|
||||
ctx->buffer[i] = _mm_setzero_si128();
|
||||
}
|
||||
((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
|
||||
INIT256( ctx->chaining );
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
const int len = (int)databitlen / 128;
|
||||
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
|
||||
const int hash_offset = SIZE256 - hashlen_m128i;
|
||||
int rem = ctx->rem_ptr;
|
||||
int blocks = len / SIZE256;
|
||||
__m128i* in = (__m128i*)input;
|
||||
|
||||
// --- update ---
|
||||
|
||||
// digest any full blocks, process directly from input
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF512( ctx->chaining, &in[ i * SIZE256 ] );
|
||||
ctx->buf_ptr = blocks * SIZE256;
|
||||
|
||||
// cryptonight has 200 byte input, an odd number of __m128i
|
||||
// remainder is only 8 bytes, ie u64.
|
||||
if ( databitlen % 128 !=0 )
|
||||
{
|
||||
// must be cryptonight, copy 64 bits of data
|
||||
*(uint64_t*)(ctx->buffer) = *(uint64_t*)(&in[ ctx->buf_ptr ] );
|
||||
i = -1; // signal for odd length
|
||||
}
|
||||
else
|
||||
{
|
||||
// Copy any remaining data to buffer for final transform
|
||||
for ( i = 0; i < len % SIZE256; i++ )
|
||||
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += rem; // use i as rem_ptr in final
|
||||
}
|
||||
|
||||
//--- final ---
|
||||
|
||||
// adjust for final block
|
||||
blocks++;
|
||||
|
||||
if ( i == len - 1 )
|
||||
{
|
||||
// all padding at once
|
||||
ctx->buffer[i] = _mm_set_epi8( blocks,blocks>>8,0,0, 0,0,0,0,
|
||||
0, 0,0,0, 0,0,0,0x80 );
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( i == -1 )
|
||||
{
|
||||
// cryptonight odd length
|
||||
((uint64_t*)ctx->buffer)[ 1 ] = 0x80ull;
|
||||
// finish the block with zero and length padding as normal
|
||||
i = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// add first padding
|
||||
ctx->buffer[i] = _mm_set_epi8( 0,0,0,0, 0,0,0,0,
|
||||
0,0,0,0, 0,0,0,0x80 );
|
||||
}
|
||||
// add zero padding
|
||||
for ( i += 1; i < SIZE256 - 1; i++ )
|
||||
ctx->buffer[i] = _mm_setzero_si128();
|
||||
// add length padding
|
||||
// cheat since we know the block count is trivial, good if block < 256
|
||||
ctx->buffer[i] = _mm_set_epi8( blocks,blocks>>8,0,0, 0,0,0,0,
|
||||
0, 0,0,0, 0,0,0,0 );
|
||||
}
|
||||
|
||||
// digest final padding block and do output transform
|
||||
TF512( ctx->chaining, ctx->buffer );
|
||||
OF512( ctx->chaining );
|
||||
|
||||
// store hash result in output
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m128i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
return SUCCESS_GR;
|
||||
}
|
||||
|
||||
|
||||
/* hash bit sequence */
|
||||
HashReturn_gr hash_groestl256(int hashbitlen,
|
||||
const BitSequence_gr* data,
|
||||
|
@@ -115,4 +115,7 @@ HashReturn_gr hash_groestli256( int, const BitSequence_gr*, DataLength_gr,
|
||||
HashReturn_gr update_and_final_groestl256( hashState_groestl256*, void*,
|
||||
const void*, DataLength_gr );
|
||||
|
||||
int groestl256_full( hashState_groestl256* ctx,
|
||||
void* output, const void* input, DataLength_gr databitlen );
|
||||
|
||||
#endif /* __hash_h */
|
||||
|
@@ -53,7 +53,7 @@ int scanhash_groestl_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( hash+(lane<<3), ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
submit_solution( work, hash+(lane<<3), mythr );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "groestl-gate.h"
|
||||
|
||||
#if !defined(GROESTL_8WAY) && !defined(GROESTLX16R_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
@@ -88,4 +91,4 @@ int scanhash_groestl( struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -23,7 +23,6 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
|
||||
int i;
|
||||
|
||||
ctx->hashlen = hashlen;
|
||||
SET_CONSTANTS();
|
||||
|
||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
||||
return 1;
|
||||
@@ -36,9 +35,6 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
|
||||
|
||||
// The only non-zero in the IV is len. It can be hard coded.
|
||||
ctx->chaining[ 3 ] = m512_const2_64( 0, 0x0100000000000000 );
|
||||
// uint64_t len = U64BIG((uint64_t)LENGTH);
|
||||
// ctx->chaining[ COLS/2 -1 ] = _mm512_set4_epi64( len, 0, len, 0 );
|
||||
// INIT256_4way(ctx->chaining);
|
||||
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
@@ -46,6 +42,77 @@ int groestl256_4way_init( groestl256_4way_context* ctx, uint64_t hashlen )
|
||||
return 0;
|
||||
}
|
||||
|
||||
int groestl256_4way_full( groestl256_4way_context* ctx, void* output,
|
||||
const void* input, uint64_t databitlen )
|
||||
{
|
||||
const int len = (int)databitlen / 128;
|
||||
const int hashlen_m128i = 32 / 16; // bytes to __m128i
|
||||
const int hash_offset = SIZE256 - hashlen_m128i;
|
||||
int rem = ctx->rem_ptr;
|
||||
int blocks = len / SIZE256;
|
||||
__m512i* in = (__m512i*)input;
|
||||
int i;
|
||||
|
||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
||||
return 1;
|
||||
|
||||
for ( i = 0; i < SIZE256; i++ )
|
||||
{
|
||||
ctx->chaining[i] = m512_zero;
|
||||
ctx->buffer[i] = m512_zero;
|
||||
}
|
||||
|
||||
// The only non-zero in the IV is len. It can be hard coded.
|
||||
ctx->chaining[ 3 ] = m512_const2_64( 0, 0x0100000000000000 );
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->rem_ptr = 0;
|
||||
|
||||
// --- update ---
|
||||
|
||||
// digest any full blocks, process directly from input
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
TF512_4way( ctx->chaining, &in[ i * SIZE256 ] );
|
||||
ctx->buf_ptr = blocks * SIZE256;
|
||||
|
||||
// copy any remaining data to buffer, it may already contain data
|
||||
// from a previous update for a midstate precalc
|
||||
for ( i = 0; i < len % SIZE256; i++ )
|
||||
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
|
||||
i += rem; // use i as rem_ptr in final
|
||||
|
||||
//--- final ---
|
||||
|
||||
blocks++; // adjust for final block
|
||||
|
||||
if ( i == SIZE256 - 1 )
|
||||
{
|
||||
// only 1 vector left in buffer, all padding at once
|
||||
ctx->buffer[i] = m512_const2_64( (uint64_t)blocks << 56, 0x80 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// add first padding
|
||||
ctx->buffer[i] = m512_const4_64( 0, 0x80, 0, 0x80 );
|
||||
// add zero padding
|
||||
for ( i += 1; i < SIZE256 - 1; i++ )
|
||||
ctx->buffer[i] = m512_zero;
|
||||
|
||||
// add length padding, second last byte is zero unless blocks > 255
|
||||
ctx->buffer[i] = m512_const2_64( (uint64_t)blocks << 56, 0 );
|
||||
}
|
||||
|
||||
// digest final padding block and do output transform
|
||||
TF512_4way( ctx->chaining, ctx->buffer );
|
||||
|
||||
OF512_4way( ctx->chaining );
|
||||
|
||||
// store hash result in output
|
||||
for ( i = 0; i < hashlen_m128i; i++ )
|
||||
casti_m512i( output, i ) = ctx->chaining[ hash_offset + i ];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int groestl256_4way_update_close( groestl256_4way_context* ctx, void* output,
|
||||
const void* input, uint64_t databitlen )
|
||||
{
|
||||
@@ -75,11 +142,11 @@ int groestl256_4way_update_close( groestl256_4way_context* ctx, void* output,
|
||||
blocks++; // adjust for final block
|
||||
|
||||
if ( i == SIZE256 - 1 )
|
||||
{
|
||||
{
|
||||
// only 1 vector left in buffer, all padding at once
|
||||
ctx->buffer[i] = m512_const1_128( _mm_set_epi8(
|
||||
blocks, blocks>>8,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// add first padding
|
||||
|
@@ -71,5 +71,8 @@ int groestl256_4way_init( groestl256_4way_context*, uint64_t );
|
||||
int groestl256_4way_update_close( groestl256_4way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
int groestl256_4way_full( groestl256_4way_context*, void*,
|
||||
const void*, uint64_t );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -14,17 +14,78 @@
|
||||
#include "groestl256-hash-4way.h"
|
||||
|
||||
#if defined(__VAES__)
|
||||
static const __m128i round_const_l0[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x7060504030201000, 0xffffffffffffffff },
|
||||
{ 0x7161514131211101, 0xffffffffffffffff },
|
||||
{ 0x7262524232221202, 0xffffffffffffffff },
|
||||
{ 0x7363534333231303, 0xffffffffffffffff },
|
||||
{ 0x7464544434241404, 0xffffffffffffffff },
|
||||
{ 0x7565554535251505, 0xffffffffffffffff },
|
||||
{ 0x7666564636261606, 0xffffffffffffffff },
|
||||
{ 0x7767574737271707, 0xffffffffffffffff },
|
||||
{ 0x7868584838281808, 0xffffffffffffffff },
|
||||
{ 0x7969594939291909, 0xffffffffffffffff }
|
||||
};
|
||||
|
||||
/* global constants */
|
||||
__m512i ROUND_CONST_Lx;
|
||||
__m512i ROUND_CONST_L0[ROUNDS512];
|
||||
__m512i ROUND_CONST_L7[ROUNDS512];
|
||||
//__m512i ROUND_CONST_P[ROUNDS1024];
|
||||
//__m512i ROUND_CONST_Q[ROUNDS1024];
|
||||
__m512i TRANSP_MASK;
|
||||
__m512i SUBSH_MASK[8];
|
||||
__m512i ALL_1B;
|
||||
__m512i ALL_FF;
|
||||
static const __m128i round_const_l7[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x0000000000000000, 0x8f9fafbfcfdfefff },
|
||||
{ 0x0000000000000000, 0x8e9eaebecedeeefe },
|
||||
{ 0x0000000000000000, 0x8d9dadbdcdddedfd },
|
||||
{ 0x0000000000000000, 0x8c9cacbcccdcecfc },
|
||||
{ 0x0000000000000000, 0x8b9babbbcbdbebfb },
|
||||
{ 0x0000000000000000, 0x8a9aaabacadaeafa },
|
||||
{ 0x0000000000000000, 0x8999a9b9c9d9e9f9 },
|
||||
{ 0x0000000000000000, 0x8898a8b8c8d8e8f8 },
|
||||
{ 0x0000000000000000, 0x8797a7b7c7d7e7f7 },
|
||||
{ 0x0000000000000000, 0x8696a6b6c6d6e6f6 }
|
||||
};
|
||||
|
||||
static const __m512i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02,
|
||||
0x1d1519111c141810, 0x1f171b131e161a12,
|
||||
0x2d2529212c242820, 0x2f272b232e262a22,
|
||||
0x3d3539313c343830, 0x3f373b333e363a32 };
|
||||
|
||||
static const __m512i SUBSH_MASK0 = { 0x0c0f0104070b0e00, 0x03060a0d08020509,
|
||||
0x1c1f1114171b1e10, 0x13161a1d18121519,
|
||||
0x2c2f2124272b2e20, 0x23262a2d28222529,
|
||||
0x3c3f3134373b3e30, 0x33363a3d38323539 };
|
||||
|
||||
static const __m512i SUBSH_MASK1 = { 0x0e090205000d0801, 0x04070c0f0a03060b,
|
||||
0x1e191215101d1801, 0x14171c1f1a13161b,
|
||||
0x2e292225202d2821, 0x24272c2f2a23262b,
|
||||
0x3e393235303d3831, 0x34373c3f3a33363b };
|
||||
|
||||
static const __m512i SUBSH_MASK2 = { 0x080b0306010f0a02, 0x05000e090c04070d,
|
||||
0x181b1316111f1a12, 0x15101e191c14171d,
|
||||
0x282b2326212f2a22, 0x25202e292c24272d,
|
||||
0x383b3336313f3a32, 0x35303e393c34373d };
|
||||
|
||||
static const __m512i SUBSH_MASK3 = { 0x0a0d040702090c03, 0x0601080b0e05000f,
|
||||
0x1a1d141712191c13, 0x1611181b1e15101f,
|
||||
0x2a2d242722292c23, 0x2621282b2e25202f,
|
||||
0x3a3d343732393c33, 0x3631383b3e35303f };
|
||||
|
||||
static const __m512i SUBSH_MASK4 = { 0x0b0e0500030a0d04, 0x0702090c0f060108,
|
||||
0x1b1e1510131a1d14, 0x1712191c1f161118,
|
||||
0x2b2e2520232a2d24, 0x2722292c2f262128,
|
||||
0x3b3e3530333a3d34, 0x3732393c3f363138 };
|
||||
|
||||
static const __m512i SUBSH_MASK5 = { 0x0d080601040c0f05, 0x00030b0e0907020a,
|
||||
0x1d181611141c1f15, 0x10131b1e1917121a,
|
||||
0x2d282621242c2f25, 0x20232b2e2927222a,
|
||||
0x3d383631343c3f35, 0x30333b3e3937323a };
|
||||
|
||||
static const __m512i SUBSH_MASK6 = { 0x0f0a0702050e0906, 0x01040d080b00030c,
|
||||
0x1f1a1712151e1916, 0x11141d181b10131c,
|
||||
0x2f2a2722252e2926, 0x21242d282b20232c,
|
||||
0x3f3a3732353e3936, 0x31343d383b30333c };
|
||||
|
||||
static const __m512i SUBSH_MASK7 = { 0x090c000306080b07, 0x02050f0a0d01040e,
|
||||
0x191c101316181b17, 0x12151f1a1d11141e,
|
||||
0x292c202326282b27, 0x22252f2a2d21242e,
|
||||
0x393c303336383b37, 0x32353f3a3d31343e };
|
||||
|
||||
#define tos(a) #a
|
||||
#define tostr(a) tos(a)
|
||||
@@ -40,8 +101,6 @@ __m512i ALL_FF;
|
||||
i = _mm512_xor_si512(i, j);\
|
||||
}
|
||||
|
||||
/**/
|
||||
|
||||
/* Yet another implementation of MixBytes.
|
||||
This time we use the formulae (3) from the paper "Byte Slicing Groestl".
|
||||
Input: a0, ..., a7
|
||||
@@ -155,95 +214,36 @@ __m512i ALL_FF;
|
||||
b1 = _mm512_xor_si512(b1, a4);\
|
||||
}/*MixBytes*/
|
||||
|
||||
// calculate the round constants seperately and load at startup
|
||||
|
||||
#define SET_CONSTANTS(){\
|
||||
ALL_1B = _mm512_set1_epi32( 0x1b1b1b1b );\
|
||||
TRANSP_MASK = _mm512_set_epi32( \
|
||||
0x3f373b33, 0x3e363a32, 0x3d353931, 0x3c343830, \
|
||||
0x2f272b23, 0x2e262a22, 0x2d252921, 0x2c242820, \
|
||||
0x1f171b13, 0x1e161a12, 0x1d151911, 0x1c141810, \
|
||||
0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800 ); \
|
||||
SUBSH_MASK[0] = _mm512_set_epi32( \
|
||||
0x33363a3d, 0x38323539, 0x3c3f3134, 0x373b3e30, \
|
||||
0x23262a2d, 0x28222529, 0x2c2f2124, 0x272b2e20, \
|
||||
0x13161a1d, 0x18121519, 0x1c1f1114, 0x171b1e10, \
|
||||
0x03060a0d, 0x08020509, 0x0c0f0104, 0x070b0e00 ); \
|
||||
SUBSH_MASK[1] = _mm512_set_epi32( \
|
||||
0x34373c3f, 0x3a33363b, 0x3e393235, 0x303d3831, \
|
||||
0x24272c2f, 0x2a23262b, 0x2e292225, 0x202d2821, \
|
||||
0x14171c1f, 0x1a13161b, 0x1e191215, 0x101d1801, \
|
||||
0x04070c0f, 0x0a03060b, 0x0e090205, 0x000d0801 );\
|
||||
SUBSH_MASK[2] = _mm512_set_epi32( \
|
||||
0x35303e39, 0x3c34373d, 0x383b3336, 0x313f3a32, \
|
||||
0x25202e29, 0x2c24272d, 0x282b2326, 0x212f2a22, \
|
||||
0x15101e19, 0x1c14171d, 0x181b1316, 0x111f1a12, \
|
||||
0x05000e09, 0x0c04070d, 0x080b0306, 0x010f0a02 );\
|
||||
SUBSH_MASK[3] = _mm512_set_epi32( \
|
||||
0x3631383b, 0x3e35303f, 0x3a3d3437, 0x32393c33, \
|
||||
0x2621282b, 0x2e25202f, 0x2a2d2427, 0x22292c23, \
|
||||
0x1611181b, 0x1e15101f, 0x1a1d1417, 0x12191c13, \
|
||||
0x0601080b, 0x0e05000f, 0x0a0d0407, 0x02090c03 );\
|
||||
SUBSH_MASK[4] = _mm512_set_epi32( \
|
||||
0x3732393c, 0x3f363138, 0x3b3e3530, 0x333a3d34, \
|
||||
0x2722292c, 0x2f262128, 0x2b2e2520, 0x232a2d24, \
|
||||
0x1712191c, 0x1f161118, 0x1b1e1510, 0x131a1d14, \
|
||||
0x0702090c, 0x0f060108, 0x0b0e0500, 0x030a0d04 );\
|
||||
SUBSH_MASK[5] = _mm512_set_epi32( \
|
||||
0x30333b3e, 0x3937323a, 0x3d383631, 0x343c3f35, \
|
||||
0x20232b2e, 0x2927222a, 0x2d282621, 0x242c2f25, \
|
||||
0x10131b1e, 0x1917121a, 0x1d181611, 0x141c1f15, \
|
||||
0x00030b0e, 0x0907020a, 0x0d080601, 0x040c0f05 );\
|
||||
SUBSH_MASK[6] = _mm512_set_epi32( \
|
||||
0x31343d38, 0x3b30333c, 0x3f3a3732, 0x353e3936, \
|
||||
0x21242d28, 0x2b20232c, 0x2f2a2722, 0x252e2926, \
|
||||
0x11141d18, 0x1b10131c, 0x1f1a1712, 0x151e1916, \
|
||||
0x01040d08, 0x0b00030c, 0x0f0a0702, 0x050e0906 );\
|
||||
SUBSH_MASK[7] = _mm512_set_epi32( \
|
||||
0x32353f3a, 0x3d31343e, 0x393c3033, 0x36383b37, \
|
||||
0x22252f2a, 0x2d21242e, 0x292c2023, 0x26282b27, \
|
||||
0x12151f1a, 0x1d11141e, 0x191c1013, 0x16181b17, \
|
||||
0x02050f0a, 0x0d01040e, 0x090c0003, 0x06080b07 );\
|
||||
for ( i = 0; i < ROUNDS512; i++ ) \
|
||||
{\
|
||||
ROUND_CONST_L0[i] = _mm512_set4_epi32( 0xffffffff, 0xffffffff, \
|
||||
0x70605040 ^ ( i * 0x01010101 ), 0x30201000 ^ ( i * 0x01010101 ) ); \
|
||||
ROUND_CONST_L7[i] = _mm512_set4_epi32( 0x8f9fafbf ^ ( i * 0x01010101 ), \
|
||||
0xcfdfefff ^ ( i * 0x01010101 ), 0x00000000, 0x00000000 ); \
|
||||
}\
|
||||
ROUND_CONST_Lx = _mm512_set4_epi32( 0xffffffff, 0xffffffff, \
|
||||
0x00000000, 0x00000000 ); \
|
||||
}while(0);\
|
||||
|
||||
#define ROUND(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* AddRoundConstant */\
|
||||
b1 = ROUND_CONST_Lx;\
|
||||
a0 = _mm512_xor_si512( a0, (ROUND_CONST_L0[i]) );\
|
||||
b1 = m512_const2_64( 0xffffffffffffffff, 0 ); \
|
||||
a0 = _mm512_xor_si512( a0, m512_const1_128( round_const_l0[i] ) );\
|
||||
a1 = _mm512_xor_si512( a1, b1 );\
|
||||
a2 = _mm512_xor_si512( a2, b1 );\
|
||||
a3 = _mm512_xor_si512( a3, b1 );\
|
||||
a4 = _mm512_xor_si512( a4, b1 );\
|
||||
a5 = _mm512_xor_si512( a5, b1 );\
|
||||
a6 = _mm512_xor_si512( a6, b1 );\
|
||||
a7 = _mm512_xor_si512( a7, (ROUND_CONST_L7[i]) );\
|
||||
a7 = _mm512_xor_si512( a7, m512_const1_128( round_const_l7[i] ) );\
|
||||
\
|
||||
/* ShiftBytes + SubBytes (interleaved) */\
|
||||
b0 = _mm512_xor_si512( b0, b0 );\
|
||||
a0 = _mm512_shuffle_epi8( a0, (SUBSH_MASK[0]) );\
|
||||
a0 = _mm512_shuffle_epi8( a0, SUBSH_MASK0 );\
|
||||
a0 = _mm512_aesenclast_epi128(a0, b0 );\
|
||||
a1 = _mm512_shuffle_epi8( a1, (SUBSH_MASK[1]) );\
|
||||
a1 = _mm512_shuffle_epi8( a1, SUBSH_MASK1 );\
|
||||
a1 = _mm512_aesenclast_epi128(a1, b0 );\
|
||||
a2 = _mm512_shuffle_epi8( a2, (SUBSH_MASK[2]) );\
|
||||
a2 = _mm512_shuffle_epi8( a2, SUBSH_MASK2 );\
|
||||
a2 = _mm512_aesenclast_epi128(a2, b0 );\
|
||||
a3 = _mm512_shuffle_epi8( a3, (SUBSH_MASK[3]) );\
|
||||
a3 = _mm512_shuffle_epi8( a3, SUBSH_MASK3 );\
|
||||
a3 = _mm512_aesenclast_epi128(a3, b0 );\
|
||||
a4 = _mm512_shuffle_epi8( a4, (SUBSH_MASK[4]) );\
|
||||
a4 = _mm512_shuffle_epi8( a4, SUBSH_MASK4 );\
|
||||
a4 = _mm512_aesenclast_epi128(a4, b0 );\
|
||||
a5 = _mm512_shuffle_epi8( a5, (SUBSH_MASK[5]) );\
|
||||
a5 = _mm512_shuffle_epi8( a5, SUBSH_MASK5 );\
|
||||
a5 = _mm512_aesenclast_epi128(a5, b0 );\
|
||||
a6 = _mm512_shuffle_epi8( a6, (SUBSH_MASK[6]) );\
|
||||
a6 = _mm512_shuffle_epi8( a6, SUBSH_MASK6 );\
|
||||
a6 = _mm512_aesenclast_epi128(a6, b0 );\
|
||||
a7 = _mm512_shuffle_epi8( a7, (SUBSH_MASK[7]) );\
|
||||
a7 = _mm512_shuffle_epi8( a7, SUBSH_MASK7 );\
|
||||
a7 = _mm512_aesenclast_epi128( a7, b0 );\
|
||||
\
|
||||
/* MixBytes */\
|
||||
@@ -390,29 +390,6 @@ __m512i ALL_FF;
|
||||
}/**/
|
||||
|
||||
|
||||
|
||||
void INIT256_4way( __m512i* chaining )
|
||||
{
|
||||
static __m512i xmm0, xmm2, xmm6, xmm7;
|
||||
static __m512i xmm12, xmm13, xmm14, xmm15;
|
||||
|
||||
/* load IV into registers xmm12 - xmm15 */
|
||||
xmm12 = chaining[0];
|
||||
xmm13 = chaining[1];
|
||||
xmm14 = chaining[2];
|
||||
xmm15 = chaining[3];
|
||||
|
||||
/* transform chaining value from column ordering into row ordering */
|
||||
/* we put two rows (64 bit) of the IV into one 128-bit XMM register */
|
||||
Matrix_Transpose_A(xmm12, xmm13, xmm14, xmm15, xmm2, xmm6, xmm7, xmm0);
|
||||
|
||||
/* store transposed IV */
|
||||
chaining[0] = xmm12;
|
||||
chaining[1] = xmm2;
|
||||
chaining[2] = xmm6;
|
||||
chaining[3] = xmm7;
|
||||
}
|
||||
|
||||
void TF512_4way( __m512i* chaining, __m512i* message )
|
||||
{
|
||||
static __m512i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
|
@@ -19,10 +19,6 @@
|
||||
|
||||
int groestl512_4way_init( groestl512_4way_context* ctx, uint64_t hashlen )
|
||||
{
|
||||
int i;
|
||||
|
||||
SET_CONSTANTS();
|
||||
|
||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
||||
return 1;
|
||||
|
||||
@@ -99,7 +95,6 @@ int groestl512_4way_full( groestl512_4way_context* ctx, void* output,
|
||||
|
||||
// --- init ---
|
||||
|
||||
SET_CONSTANTS();
|
||||
memset_zero_512( ctx->chaining, SIZE512 );
|
||||
memset_zero_512( ctx->buffer, SIZE512 );
|
||||
ctx->chaining[ 6 ] = m512_const2_64( 0x0200000000000000, 0 );
|
||||
|
@@ -15,16 +15,86 @@
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
/* global constants */
|
||||
__m512i ROUND_CONST_Lx;
|
||||
//__m128i ROUND_CONST_L0[ROUNDS512];
|
||||
//__m128i ROUND_CONST_L7[ROUNDS512];
|
||||
__m512i ROUND_CONST_P[ROUNDS1024];
|
||||
__m512i ROUND_CONST_Q[ROUNDS1024];
|
||||
__m512i TRANSP_MASK;
|
||||
__m512i SUBSH_MASK[8];
|
||||
__m512i ALL_1B;
|
||||
__m512i ALL_FF;
|
||||
static const __m128i round_const_p[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x7060504030201000, 0xf0e0d0c0b0a09080 },
|
||||
{ 0x7161514131211101, 0xf1e1d1c1b1a19181 },
|
||||
{ 0x7262524232221202, 0xf2e2d2c2b2a29282 },
|
||||
{ 0x7363534333231303, 0xf3e3d3c3b3a39383 },
|
||||
{ 0x7464544434241404, 0xf4e4d4c4b4a49484 },
|
||||
{ 0x7565554535251505, 0xf5e5d5c5b5a59585 },
|
||||
{ 0x7666564636261606, 0xf6e6d6c6b6a69686 },
|
||||
{ 0x7767574737271707, 0xf7e7d7c7b7a79787 },
|
||||
{ 0x7868584838281808, 0xf8e8d8c8b8a89888 },
|
||||
{ 0x7969594939291909, 0xf9e9d9c9b9a99989 },
|
||||
{ 0x7a6a5a4a3a2a1a0a, 0xfaeadacabaaa9a8a },
|
||||
{ 0x7b6b5b4b3b2b1b0b, 0xfbebdbcbbbab9b8b },
|
||||
{ 0x7c6c5c4c3c2c1c0c, 0xfcecdcccbcac9c8c },
|
||||
{ 0x7d6d5d4d3d2d1d0d, 0xfdedddcdbdad9d8d }
|
||||
};
|
||||
|
||||
static const __m128i round_const_q[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x8f9fafbfcfdfefff, 0x0f1f2f3f4f5f6f7f },
|
||||
{ 0x8e9eaebecedeeefe, 0x0e1e2e3e4e5e6e7e },
|
||||
{ 0x8d9dadbdcdddedfd, 0x0d1d2d3d4d5d6d7d },
|
||||
{ 0x8c9cacbcccdcecfc, 0x0c1c2c3c4c5c6c7c },
|
||||
{ 0x8b9babbbcbdbebfb, 0x0b1b2b3b4b5b6b7b },
|
||||
{ 0x8a9aaabacadaeafa, 0x0a1a2a3a4a5a6a7a },
|
||||
{ 0x8999a9b9c9d9e9f9, 0x0919293949596979 },
|
||||
{ 0x8898a8b8c8d8e8f8, 0x0818283848586878 },
|
||||
{ 0x8797a7b7c7d7e7f7, 0x0717273747576777 },
|
||||
{ 0x8696a6b6c6d6e6f6, 0x0616263646566676 },
|
||||
{ 0x8595a5b5c5d5e5f5, 0x0515253545556575 },
|
||||
{ 0x8494a4b4c4d4e4f4, 0x0414243444546474 },
|
||||
{ 0x8393a3b3c3d3e3f3, 0x0313233343536373 },
|
||||
{ 0x8292a2b2c2d2e2f2, 0x0212223242526272 }
|
||||
};
|
||||
|
||||
static const __m512i TRANSP_MASK = { 0x0d0509010c040800, 0x0f070b030e060a02,
|
||||
0x1d1519111c141810, 0x1f171b131e161a12,
|
||||
0x2d2529212c242820, 0x2f272b232e262a22,
|
||||
0x3d3539313c343830, 0x3f373b333e363a32 };
|
||||
|
||||
static const __m512i SUBSH_MASK0 = { 0x0b0e0104070a0d00, 0x0306090c0f020508,
|
||||
0x1b1e1114171a1d10, 0x1316191c1f121518,
|
||||
0x2b2e2124272a2d20, 0x2326292c2f222528,
|
||||
0x3b3e3134373a3d30, 0x3336393c3f323538 };
|
||||
|
||||
static const __m512i SUBSH_MASK1 = { 0x0c0f0205080b0e01, 0x04070a0d00030609,
|
||||
0x1c1f1215181b1e11, 0x14171a1d10131619,
|
||||
0x2c2f2225282b2e21, 0x24272a2d20232629,
|
||||
0x3c3f3235383b3e31, 0x34373a3d30333639 };
|
||||
|
||||
static const __m512i SUBSH_MASK2 = { 0x0d000306090c0f02, 0x05080b0e0104070a,
|
||||
0x1d101316191c1f12, 0x15181b1e1114171a,
|
||||
0x2d202326292c2f22, 0x25282b2e2124272a,
|
||||
0x3d303336393c3f32, 0x35383b3e3134373a };
|
||||
|
||||
static const __m512i SUBSH_MASK3 = { 0x0e0104070a0d0003, 0x06090c0f0205080b,
|
||||
0x1e1114171a1d1013, 0x16191c1f1215181b,
|
||||
0x2e2124272a2d2023, 0x26292c2f2225282b,
|
||||
0x3e3134373a3d3033, 0x36393c3f3235383b };
|
||||
|
||||
static const __m512i SUBSH_MASK4 = { 0x0f0205080b0e0104, 0x070a0d000306090c,
|
||||
0x1f1215181b1e1114, 0x171a1d101316191c,
|
||||
0x2f2225282b2e2124, 0x272a2d202326292c,
|
||||
0x3f3235383b3e3134, 0x373a3d303336393c };
|
||||
|
||||
static const __m512i SUBSH_MASK5 = { 0x000306090c0f0205, 0x080b0e0104070a0d,
|
||||
0x101316191c1f1215, 0x181b1e1114171a1d,
|
||||
0x202326292c2f2225, 0x282b2e2124272a2d,
|
||||
0x303336393c3f3235, 0x383b3e3134373a3d };
|
||||
|
||||
static const __m512i SUBSH_MASK6 = { 0x0104070a0d000306, 0x090c0f0205080b0e,
|
||||
0x1114171a1d101316, 0x191c1f1215181b1e,
|
||||
0x2124272a2d202326, 0x292c2f2225282b2e,
|
||||
0x3134373a3d303336, 0x393c3f3235383b3e };
|
||||
|
||||
static const __m512i SUBSH_MASK7 = { 0x06090c0f0205080b, 0x0e0104070a0d0003,
|
||||
0x16191c1f1215181b, 0x1e1114171a1d1013,
|
||||
0x26292c2f2225282b, 0x2e2124272a2d2023,
|
||||
0x36393c3f3235383b, 0x3e3134373a3d3033 };
|
||||
|
||||
#define tos(a) #a
|
||||
#define tostr(a) tos(a)
|
||||
@@ -155,69 +225,6 @@ __m512i ALL_FF;
|
||||
b1 = _mm512_xor_si512(b1, a4);\
|
||||
}/*MixBytes*/
|
||||
|
||||
// calculate the round constants seperately and load at startup
|
||||
|
||||
#define SET_CONSTANTS(){\
|
||||
ALL_FF = _mm512_set1_epi32( 0xffffffff );\
|
||||
ALL_1B = _mm512_set1_epi32( 0x1b1b1b1b );\
|
||||
TRANSP_MASK = _mm512_set_epi32( \
|
||||
0x3f373b33, 0x3e363a32, 0x3d353931, 0x3c343830, \
|
||||
0x2f272b23, 0x2e262a22, 0x2d252921, 0x2c242820, \
|
||||
0x1f171b13, 0x1e161a12, 0x1d151911, 0x1c141810, \
|
||||
0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800 ); \
|
||||
SUBSH_MASK[0] = _mm512_set_epi32( \
|
||||
0x3336393c, 0x3f323538, 0x3b3e3134, 0x373a3d30, \
|
||||
0x2326292c, 0x2f222528, 0x2b2e2124, 0x272a2d20, \
|
||||
0x1316191c, 0x1f121518, 0x1b1e1114, 0x171a1d10, \
|
||||
0x0306090c, 0x0f020508, 0x0b0e0104, 0x070a0d00 ); \
|
||||
SUBSH_MASK[1] = _mm512_set_epi32( \
|
||||
0x34373a3d, 0x30333639, 0x3c3f3235, 0x383b3e31, \
|
||||
0x24272a2d, 0x20232629, 0x2c2f2225, 0x282b2e21, \
|
||||
0x14171a1d, 0x10131619, 0x1c1f1215, 0x181b1e11, \
|
||||
0x04070a0d, 0x00030609, 0x0c0f0205, 0x080b0e01 ); \
|
||||
SUBSH_MASK[2] = _mm512_set_epi32( \
|
||||
0x35383b3e, 0x3134373a, 0x3d303336, 0x393c3f32, \
|
||||
0x25282b2e, 0x2124272a, 0x2d202326, 0x292c2f22, \
|
||||
0x15181b1e, 0x1114171a, 0x1d101316, 0x191c1f12, \
|
||||
0x05080b0e, 0x0104070a, 0x0d000306, 0x090c0f02 ); \
|
||||
SUBSH_MASK[3] = _mm512_set_epi32( \
|
||||
0x36393c3f, 0x3235383b, 0x3e313437, 0x3a3d3033, \
|
||||
0x26292c2f, 0x2225282b, 0x2e212427, 0x2a2d2023, \
|
||||
0x16191c1f, 0x1215181b, 0x1e111417, 0x1a1d1013, \
|
||||
0x06090c0f, 0x0205080b, 0x0e010407, 0x0a0d0003 ); \
|
||||
SUBSH_MASK[4] = _mm512_set_epi32( \
|
||||
0x373a3d30, 0x3336393c, 0x3f323538, 0x3b3e3134, \
|
||||
0x272a2d20, 0x2326292c, 0x2f222528, 0x2b2e2124, \
|
||||
0x171a1d10, 0x1316191c, 0x1f121518, 0x1b1e1114, \
|
||||
0x070a0d00, 0x0306090c, 0x0f020508, 0x0b0e0104 ); \
|
||||
SUBSH_MASK[5] = _mm512_set_epi32( \
|
||||
0x383b3e31, 0x34373a3d, 0x30333639, 0x3c3f3235, \
|
||||
0x282b2e21, 0x24272a2d, 0x20232629, 0x2c2f2225, \
|
||||
0x181b1e11, 0x14171a1d, 0x10131619, 0x1c1f1215, \
|
||||
0x080b0e01, 0x04070a0d, 0x00030609, 0x0c0f0205 ); \
|
||||
SUBSH_MASK[6] = _mm512_set_epi32( \
|
||||
0x393c3f32, 0x35383b3e, 0x3134373a, 0x3d303336, \
|
||||
0x292c2f22, 0x25282b2e, 0x2124272a, 0x2d202326, \
|
||||
0x191c1f12, 0x15181b1e, 0x1114171a, 0x1d101316, \
|
||||
0x090c0f02, 0x05080b0e, 0x0104070a, 0x0d000306 ); \
|
||||
SUBSH_MASK[7] = _mm512_set_epi32( \
|
||||
0x3e313437, 0x3a3d3033, 0x36393c3f, 0x3235383b, \
|
||||
0x2e212427, 0x2a2d2023, 0x26292c2f, 0x2225282b, \
|
||||
0x1e111417, 0x1a1d1013, 0x16191c1f, 0x1215181b, \
|
||||
0x0e010407, 0x0a0d0003, 0x06090c0f, 0x0205080b ); \
|
||||
for( i = 0; i < ROUNDS1024; i++ ) \
|
||||
{ \
|
||||
ROUND_CONST_P[i] = _mm512_set4_epi32( 0xf0e0d0c0 ^ (i * 0x01010101), \
|
||||
0xb0a09080 ^ (i * 0x01010101), \
|
||||
0x70605040 ^ (i * 0x01010101), \
|
||||
0x30201000 ^ (i * 0x01010101) ); \
|
||||
ROUND_CONST_Q[i] = _mm512_set4_epi32( 0x0f1f2f3f ^ (i * 0x01010101), \
|
||||
0x4f5f6f7f ^ (i * 0x01010101), \
|
||||
0x8f9fafbf ^ (i * 0x01010101), \
|
||||
0xcfdfefff ^ (i * 0x01010101));\
|
||||
} \
|
||||
}while(0);\
|
||||
|
||||
/* one round
|
||||
* a0-a7 = input rows
|
||||
* b0-b7 = output rows
|
||||
@@ -242,30 +249,32 @@ __m512i ALL_FF;
|
||||
for ( round_counter = 0; round_counter < 14; round_counter += 2 ) \
|
||||
{ \
|
||||
/* AddRoundConstant P1024 */\
|
||||
xmm8 = _mm512_xor_si512( xmm8, ( ROUND_CONST_P[ round_counter ] ) );\
|
||||
xmm8 = _mm512_xor_si512( xmm8, m512_const1_128( \
|
||||
casti_m128i( round_const_p, round_counter ) ) ); \
|
||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
||||
xmm8 = _mm512_shuffle_epi8( xmm8, ( SUBSH_MASK[0] ) );\
|
||||
xmm9 = _mm512_shuffle_epi8( xmm9, ( SUBSH_MASK[1] ) );\
|
||||
xmm10 = _mm512_shuffle_epi8( xmm10, ( SUBSH_MASK[2] ) );\
|
||||
xmm11 = _mm512_shuffle_epi8( xmm11, ( SUBSH_MASK[3] ) );\
|
||||
xmm12 = _mm512_shuffle_epi8( xmm12, ( SUBSH_MASK[4] ) );\
|
||||
xmm13 = _mm512_shuffle_epi8( xmm13, ( SUBSH_MASK[5] ) );\
|
||||
xmm14 = _mm512_shuffle_epi8( xmm14, ( SUBSH_MASK[6] ) );\
|
||||
xmm15 = _mm512_shuffle_epi8( xmm15, ( SUBSH_MASK[7] ) );\
|
||||
xmm8 = _mm512_shuffle_epi8( xmm8, SUBSH_MASK0 ); \
|
||||
xmm9 = _mm512_shuffle_epi8( xmm9, SUBSH_MASK1 );\
|
||||
xmm10 = _mm512_shuffle_epi8( xmm10, SUBSH_MASK2 );\
|
||||
xmm11 = _mm512_shuffle_epi8( xmm11, SUBSH_MASK3 );\
|
||||
xmm12 = _mm512_shuffle_epi8( xmm12, SUBSH_MASK4 );\
|
||||
xmm13 = _mm512_shuffle_epi8( xmm13, SUBSH_MASK5 );\
|
||||
xmm14 = _mm512_shuffle_epi8( xmm14, SUBSH_MASK6 );\
|
||||
xmm15 = _mm512_shuffle_epi8( xmm15, SUBSH_MASK7 );\
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
\
|
||||
/* AddRoundConstant P1024 */\
|
||||
xmm0 = _mm512_xor_si512( xmm0, ( ROUND_CONST_P[ round_counter+1 ] ) );\
|
||||
xmm0 = _mm512_xor_si512( xmm0, m512_const1_128( \
|
||||
casti_m128i( round_const_p, round_counter+1 ) ) ); \
|
||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
||||
xmm0 = _mm512_shuffle_epi8( xmm0, ( SUBSH_MASK[0] ) );\
|
||||
xmm1 = _mm512_shuffle_epi8( xmm1, ( SUBSH_MASK[1] ) );\
|
||||
xmm2 = _mm512_shuffle_epi8( xmm2, ( SUBSH_MASK[2] ) );\
|
||||
xmm3 = _mm512_shuffle_epi8( xmm3, ( SUBSH_MASK[3] ) );\
|
||||
xmm4 = _mm512_shuffle_epi8( xmm4, ( SUBSH_MASK[4] ) );\
|
||||
xmm5 = _mm512_shuffle_epi8( xmm5, ( SUBSH_MASK[5] ) );\
|
||||
xmm6 = _mm512_shuffle_epi8( xmm6, ( SUBSH_MASK[6] ) );\
|
||||
xmm7 = _mm512_shuffle_epi8( xmm7, ( SUBSH_MASK[7] ) );\
|
||||
xmm0 = _mm512_shuffle_epi8( xmm0, SUBSH_MASK0 );\
|
||||
xmm1 = _mm512_shuffle_epi8( xmm1, SUBSH_MASK1 );\
|
||||
xmm2 = _mm512_shuffle_epi8( xmm2, SUBSH_MASK2 );\
|
||||
xmm3 = _mm512_shuffle_epi8( xmm3, SUBSH_MASK3 );\
|
||||
xmm4 = _mm512_shuffle_epi8( xmm4, SUBSH_MASK4 );\
|
||||
xmm5 = _mm512_shuffle_epi8( xmm5, SUBSH_MASK5 );\
|
||||
xmm6 = _mm512_shuffle_epi8( xmm6, SUBSH_MASK6 );\
|
||||
xmm7 = _mm512_shuffle_epi8( xmm7, SUBSH_MASK7 );\
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
}\
|
||||
@@ -284,16 +293,17 @@ __m512i ALL_FF;
|
||||
xmm12 = _mm512_xor_si512( xmm12, xmm1 );\
|
||||
xmm13 = _mm512_xor_si512( xmm13, xmm1 );\
|
||||
xmm14 = _mm512_xor_si512( xmm14, xmm1 );\
|
||||
xmm15 = _mm512_xor_si512( xmm15, ( ROUND_CONST_Q[ round_counter ] ) );\
|
||||
xmm15 = _mm512_xor_si512( xmm15, m512_const1_128( \
|
||||
casti_m128i( round_const_q, round_counter ) ) ); \
|
||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
||||
xmm8 = _mm512_shuffle_epi8( xmm8, ( SUBSH_MASK[1] ) );\
|
||||
xmm9 = _mm512_shuffle_epi8( xmm9, ( SUBSH_MASK[3] ) );\
|
||||
xmm10 = _mm512_shuffle_epi8( xmm10, ( SUBSH_MASK[5] ) );\
|
||||
xmm11 = _mm512_shuffle_epi8( xmm11, ( SUBSH_MASK[7] ) );\
|
||||
xmm12 = _mm512_shuffle_epi8( xmm12, ( SUBSH_MASK[0] ) );\
|
||||
xmm13 = _mm512_shuffle_epi8( xmm13, ( SUBSH_MASK[2] ) );\
|
||||
xmm14 = _mm512_shuffle_epi8( xmm14, ( SUBSH_MASK[4] ) );\
|
||||
xmm15 = _mm512_shuffle_epi8( xmm15, ( SUBSH_MASK[6] ) );\
|
||||
xmm8 = _mm512_shuffle_epi8( xmm8, SUBSH_MASK1 );\
|
||||
xmm9 = _mm512_shuffle_epi8( xmm9, SUBSH_MASK3 );\
|
||||
xmm10 = _mm512_shuffle_epi8( xmm10, SUBSH_MASK5 );\
|
||||
xmm11 = _mm512_shuffle_epi8( xmm11, SUBSH_MASK7 );\
|
||||
xmm12 = _mm512_shuffle_epi8( xmm12, SUBSH_MASK0 );\
|
||||
xmm13 = _mm512_shuffle_epi8( xmm13, SUBSH_MASK2 );\
|
||||
xmm14 = _mm512_shuffle_epi8( xmm14, SUBSH_MASK4 );\
|
||||
xmm15 = _mm512_shuffle_epi8( xmm15, SUBSH_MASK6 );\
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
\
|
||||
@@ -306,16 +316,17 @@ __m512i ALL_FF;
|
||||
xmm4 = _mm512_xor_si512( xmm4, xmm9 );\
|
||||
xmm5 = _mm512_xor_si512( xmm5, xmm9 );\
|
||||
xmm6 = _mm512_xor_si512( xmm6, xmm9 );\
|
||||
xmm7 = _mm512_xor_si512( xmm7, ( ROUND_CONST_Q[ round_counter+1 ] ) );\
|
||||
xmm7 = _mm512_xor_si512( xmm7, m512_const1_128( \
|
||||
casti_m128i( round_const_q, round_counter+1 ) ) ); \
|
||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
||||
xmm0 = _mm512_shuffle_epi8( xmm0, ( SUBSH_MASK[1] ) );\
|
||||
xmm1 = _mm512_shuffle_epi8( xmm1, ( SUBSH_MASK[3] ) );\
|
||||
xmm2 = _mm512_shuffle_epi8( xmm2, ( SUBSH_MASK[5] ) );\
|
||||
xmm3 = _mm512_shuffle_epi8( xmm3, ( SUBSH_MASK[7] ) );\
|
||||
xmm4 = _mm512_shuffle_epi8( xmm4, ( SUBSH_MASK[0] ) );\
|
||||
xmm5 = _mm512_shuffle_epi8( xmm5, ( SUBSH_MASK[2] ) );\
|
||||
xmm6 = _mm512_shuffle_epi8( xmm6, ( SUBSH_MASK[4] ) );\
|
||||
xmm7 = _mm512_shuffle_epi8( xmm7, ( SUBSH_MASK[6] ) );\
|
||||
xmm0 = _mm512_shuffle_epi8( xmm0, SUBSH_MASK1 );\
|
||||
xmm1 = _mm512_shuffle_epi8( xmm1, SUBSH_MASK3 );\
|
||||
xmm2 = _mm512_shuffle_epi8( xmm2, SUBSH_MASK5 );\
|
||||
xmm3 = _mm512_shuffle_epi8( xmm3, SUBSH_MASK7 );\
|
||||
xmm4 = _mm512_shuffle_epi8( xmm4, SUBSH_MASK0 );\
|
||||
xmm5 = _mm512_shuffle_epi8( xmm5, SUBSH_MASK2 );\
|
||||
xmm6 = _mm512_shuffle_epi8( xmm6, SUBSH_MASK4 );\
|
||||
xmm7 = _mm512_shuffle_epi8( xmm7, SUBSH_MASK6 );\
|
||||
/* SubBytes + MixBytes */\
|
||||
SUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
}\
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "myrgr-gate.h"
|
||||
|
||||
#if !defined(MYRGR_8WAY) && !defined(MYRGR_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
@@ -86,3 +89,4 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -143,7 +143,7 @@ int scanhash_myriad_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
@@ -226,7 +226,7 @@ int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -35,6 +35,8 @@
|
||||
|
||||
#include "sph_groestl.h"
|
||||
|
||||
#if !defined(__AES__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
@@ -3116,4 +3118,6 @@ sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif // !AES
|
||||
#endif
|
||||
|
@@ -42,6 +42,7 @@ extern "C"{
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if !defined(__AES__)
|
||||
/**
|
||||
* Output size (in bits) for Groestl-224.
|
||||
*/
|
||||
@@ -326,4 +327,5 @@ void sph_groestl512_addbits_and_close(
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // !AES
|
||||
#endif
|
||||
|
@@ -1,156 +0,0 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <openssl/sha.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include "sph_hefty1.h"
|
||||
#include "algo/luffa/sph_luffa.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#ifdef __AES__
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
void bastionhash(void *output, const void *input)
|
||||
{
|
||||
unsigned char hash[64] __attribute__ ((aligned (64)));
|
||||
|
||||
#ifdef __AES__
|
||||
hashState_echo ctx_echo;
|
||||
#else
|
||||
sph_echo512_context ctx_echo;
|
||||
#endif
|
||||
hashState_luffa ctx_luffa;
|
||||
sph_fugue512_context ctx_fugue;
|
||||
sph_whirlpool_context ctx_whirlpool;
|
||||
sph_shabal512_context ctx_shabal;
|
||||
sph_hamsi512_context ctx_hamsi;
|
||||
sph_skein512_context ctx_skein;
|
||||
|
||||
HEFTY1(input, 80, hash);
|
||||
|
||||
init_luffa( &ctx_luffa, 512 );
|
||||
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, 64 );
|
||||
|
||||
if (hash[0] & 0x8)
|
||||
{
|
||||
sph_fugue512_init(&ctx_fugue);
|
||||
sph_fugue512(&ctx_fugue, hash, 64);
|
||||
sph_fugue512_close(&ctx_fugue, hash);
|
||||
} else {
|
||||
sph_skein512_init( &ctx_skein );
|
||||
sph_skein512( &ctx_skein, hash, 64 );
|
||||
sph_skein512_close( &ctx_skein, hash );
|
||||
}
|
||||
|
||||
sph_whirlpool_init(&ctx_whirlpool);
|
||||
sph_whirlpool(&ctx_whirlpool, hash, 64);
|
||||
sph_whirlpool_close(&ctx_whirlpool, hash);
|
||||
|
||||
sph_fugue512_init(&ctx_fugue);
|
||||
sph_fugue512(&ctx_fugue, hash, 64);
|
||||
sph_fugue512_close(&ctx_fugue, hash);
|
||||
|
||||
if (hash[0] & 0x8)
|
||||
{
|
||||
#ifdef __AES__
|
||||
init_echo( &ctx_echo, 512 );
|
||||
update_final_echo ( &ctx_echo,(BitSequence*)hash,
|
||||
(const BitSequence*)hash, 512 );
|
||||
#else
|
||||
sph_echo512_init(&ctx_echo);
|
||||
sph_echo512(&ctx_echo, hash, 64);
|
||||
sph_echo512_close(&ctx_echo, hash);
|
||||
#endif
|
||||
} else {
|
||||
init_luffa( &ctx_luffa, 512 );
|
||||
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, 64 );
|
||||
}
|
||||
|
||||
sph_shabal512_init(&ctx_shabal);
|
||||
sph_shabal512(&ctx_shabal, hash, 64);
|
||||
sph_shabal512_close(&ctx_shabal, hash);
|
||||
|
||||
sph_skein512_init( &ctx_skein );
|
||||
sph_skein512( &ctx_skein, hash, 64 );
|
||||
sph_skein512_close( &ctx_skein, hash );
|
||||
|
||||
if (hash[0] & 0x8)
|
||||
{
|
||||
sph_shabal512_init(&ctx_shabal);
|
||||
sph_shabal512(&ctx_shabal, hash, 64);
|
||||
sph_shabal512_close(&ctx_shabal, hash);
|
||||
} else {
|
||||
sph_whirlpool_init(&ctx_whirlpool);
|
||||
sph_whirlpool(&ctx_whirlpool, hash, 64);
|
||||
sph_whirlpool_close(&ctx_whirlpool, hash);
|
||||
}
|
||||
|
||||
sph_shabal512_init(&ctx_shabal);
|
||||
sph_shabal512(&ctx_shabal, hash, 64);
|
||||
sph_shabal512_close(&ctx_shabal, hash);
|
||||
|
||||
if (hash[0] & 0x8)
|
||||
{
|
||||
sph_hamsi512_init(&ctx_hamsi);
|
||||
sph_hamsi512(&ctx_hamsi, hash, 64);
|
||||
sph_hamsi512_close(&ctx_hamsi, hash);
|
||||
} else {
|
||||
init_luffa( &ctx_luffa, 512 );
|
||||
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
|
||||
(const BitSequence*)hash, 64 );
|
||||
}
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_bastion( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash32[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
bastionhash(hash32, endiandata);
|
||||
if (hash32[7] < Htarg && fulltest(hash32, ptarget)) {
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_bastion_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_bastion;
|
||||
gate->hash = (void*)&bastionhash;
|
||||
return true;
|
||||
};
|
||||
|
@@ -1,111 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <openssl/sha.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include "sph_hefty1.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
|
||||
/* Combines top 64-bits from each hash into a single hash */
|
||||
static void combine_hashes(uint32_t *out, uint32_t *hash1, uint32_t *hash2, uint32_t *hash3, uint32_t *hash4)
|
||||
{
|
||||
uint32_t *hash[4] = { hash1, hash2, hash3, hash4 };
|
||||
|
||||
/* Transpose first 64 bits of each hash into out */
|
||||
memset(out, 0, 32);
|
||||
int bits = 0;
|
||||
for (unsigned int i = 7; i >= 6; i--) {
|
||||
for (uint32_t mask = 0x80000000; mask; mask >>= 1) {
|
||||
for (unsigned int k = 0; k < 4; k++) {
|
||||
out[(255 - bits)/32] <<= 1;
|
||||
if ((hash[k][i] & mask) != 0)
|
||||
out[(255 - bits)/32] |= 1;
|
||||
bits++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern void heavyhash(unsigned char* output, const unsigned char* input, int len)
|
||||
{
|
||||
unsigned char hash1[32];
|
||||
HEFTY1(input, len, hash1);
|
||||
|
||||
// HEFTY1 is new, so take an extra security measure to eliminate
|
||||
// * the possiblity of collisions:
|
||||
// *
|
||||
// * Hash(x) = SHA256(x + HEFTY1(x))
|
||||
// *
|
||||
// * N.B. '+' is concatenation.
|
||||
//
|
||||
unsigned char hash2[32];;
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init(&ctx);
|
||||
SHA256_Update(&ctx, input, len);
|
||||
SHA256_Update(&ctx, hash1, sizeof(hash1));
|
||||
SHA256_Final(hash2, &ctx);
|
||||
|
||||
// * Additional security: Do not rely on a single cryptographic hash
|
||||
// * function. Instead, combine the outputs of 4 of the most secure
|
||||
// * cryptographic hash functions-- SHA256, KECCAK512, GROESTL512
|
||||
// * and BLAKE512.
|
||||
|
||||
|
||||
uint32_t hash3[16];
|
||||
sph_keccak512_context keccakCtx;
|
||||
sph_keccak512_init(&keccakCtx);
|
||||
sph_keccak512(&keccakCtx, input, len);
|
||||
sph_keccak512(&keccakCtx, hash1, sizeof(hash1));
|
||||
sph_keccak512_close(&keccakCtx, (void *)&hash3);
|
||||
|
||||
uint32_t hash4[16];
|
||||
sph_groestl512_context groestlCtx;
|
||||
sph_groestl512_init(&groestlCtx);
|
||||
sph_groestl512(&groestlCtx, input, len);
|
||||
sph_groestl512(&groestlCtx, hash1, sizeof(hash1));
|
||||
sph_groestl512_close(&groestlCtx, (void *)&hash4);
|
||||
|
||||
uint32_t hash5[16];
|
||||
sph_blake512_context blakeCtx;
|
||||
sph_blake512_init(&blakeCtx);
|
||||
sph_blake512(&blakeCtx, input, len);
|
||||
sph_blake512(&blakeCtx, (unsigned char *)&hash1, sizeof(hash1));
|
||||
sph_blake512_close(&blakeCtx, (void *)&hash5);
|
||||
|
||||
uint32_t *final = (uint32_t *)output;
|
||||
combine_hashes(final, (uint32_t *)hash2, hash3, hash4, hash5);
|
||||
|
||||
}
|
||||
|
||||
int scanhash_heavy( uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[8];
|
||||
uint32_t start_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
do {
|
||||
heavyhash((unsigned char *)hash, (unsigned char *)pdata, 80);
|
||||
|
||||
if (hash[7] <= ptarget[7]) {
|
||||
if (fulltest(hash, ptarget)) {
|
||||
*hashes_done = pdata[19] - start_nonce;
|
||||
return 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pdata[19]++;
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
*hashes_done = pdata[19] - start_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_heavy_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_heavy;
|
||||
gate->hash = (void*)&heavyhash;
|
||||
return true;
|
||||
};
|
||||
|
@@ -144,7 +144,7 @@ int hodl_scanhash( struct work* work, uint32_t max_nonce,
|
||||
#if defined(__AES__)
|
||||
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id );
|
||||
pthread_barrier_wait( &hodl_barrier );
|
||||
return scanhash_hodl_wolf( work, max_nonce, hashes_done, thr_info );
|
||||
return scanhash_hodl_wolf( work, max_nonce, hashes_done, mythr );
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@@ -129,9 +129,10 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
if( FinalPoW[7] <= ptarget[7] )
|
||||
{
|
||||
pdata[20] = swab32( BlockHdr[20] );
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
return(1);
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
submit_solution( work, FinalPoW, mythr );
|
||||
return(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -198,7 +199,8 @@ int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
pdata[20] = swab32( BlockHdr[20] );
|
||||
pdata[21] = swab32( BlockHdr[21] );
|
||||
*hashes_done = CollisionCount;
|
||||
return(1);
|
||||
submit_solution( work, FinalPoW, mythr );
|
||||
return(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -129,7 +129,7 @@ int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "jha-gate.h"
|
||||
|
||||
#if !defined(JHA_8WAY) && !defined(JHA_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -133,3 +136,4 @@ int scanhash_jha( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -45,7 +45,7 @@ int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
@@ -97,7 +97,7 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( valid_hash( lane_hash, ptarget ))
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
|
@@ -3,6 +3,8 @@
|
||||
#include "keccak-hash-4way.h"
|
||||
#include "keccak-gate.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
static const uint64_t RC[] = {
|
||||
0x0000000000000001, 0x0000000000008082,
|
||||
0x800000000000808A, 0x8000000080008000,
|
||||
@@ -239,7 +241,7 @@ keccak512_8way_close(void *cc, void *dst)
|
||||
|
||||
#endif // AVX512
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// AVX2
|
||||
|
||||
#define INPUT_BUF(size) do { \
|
||||
size_t j; \
|
||||
|
@@ -1,4 +1,6 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "keccak-gate.h"
|
||||
|
||||
#if !defined(KECCAK_8WAY) && !defined(KECCAK_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -49,3 +51,4 @@ int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -52,7 +52,7 @@ int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
@@ -111,7 +111,7 @@ int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "keccak-gate.h"
|
||||
|
||||
#if !defined(KECCAK_8WAY) && !defined(KECCAK_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
@@ -48,3 +51,4 @@ int scanhash_sha3d( struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -1,63 +0,0 @@
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "sph_luffa.h"
|
||||
|
||||
void luffahash(void *output, const void *input)
|
||||
{
|
||||
unsigned char _ALIGN(128) hash[64];
|
||||
sph_luffa512_context ctx_luffa;
|
||||
|
||||
sph_luffa512_init(&ctx_luffa);
|
||||
sph_luffa512 (&ctx_luffa, input, 80);
|
||||
sph_luffa512_close(&ctx_luffa, (void*) hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_luffa(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
luffahash(hash64, endiandata);
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_luffa_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_luffa;
|
||||
gate->hash = (void*)&luffahash;
|
||||
return true;
|
||||
};
|
||||
|
@@ -1,3 +1,6 @@
|
||||
#if !defined(LUFFA_FOR_SSE2_H__)
|
||||
#define LUFFA_FOR_SSE2_H__ 1
|
||||
|
||||
/*
|
||||
* luffa_for_sse2.h
|
||||
* Version 2.0 (Sep 15th 2009)
|
||||
@@ -48,8 +51,6 @@
|
||||
typedef struct {
|
||||
uint32 buffer[8] __attribute((aligned(32)));
|
||||
__m128i chainv[10] __attribute((aligned(32))); /* Chaining values */
|
||||
// uint64 bitlen[2]; /* Message length in bits */
|
||||
// uint32 rembitlen; /* Length of buffer data to be hashed */
|
||||
int hashbitlen;
|
||||
int rembytes;
|
||||
} hashState_luffa;
|
||||
@@ -67,4 +68,4 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
|
||||
|
||||
int luffa_full( hashState_luffa *state, BitSequence* output, int hashbitlen,
|
||||
const BitSequence* data, size_t inlen );
|
||||
|
||||
#endif // LUFFA_FOR_SSE2_H___
|
||||
|
@@ -115,9 +115,8 @@ void allium_16way_hash( void *state, const void *input )
|
||||
intrlv_4x128( vhashA, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x128( vhashB, hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
cube_4way_full( &ctx.cube, vhashA, 256, vhashA, 32 );
|
||||
cube_4way_full( &ctx.cube, vhashB, 256, vhashB, 32 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
@@ -125,10 +124,8 @@ void allium_16way_hash( void *state, const void *input )
|
||||
intrlv_4x128( vhashA, hash8, hash9, hash10, hash11, 256 );
|
||||
intrlv_4x128( vhashB, hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
cube_4way_full( &ctx.cube, vhashA, 256, vhashA, 32 );
|
||||
cube_4way_full( &ctx.cube, vhashB, 256, vhashB, 32 );
|
||||
|
||||
dintrlv_4x128( hash8, hash9, hash10, hash11, vhashA, 256 );
|
||||
dintrlv_4x128( hash12, hash13, hash14, hash15, vhashB, 256 );
|
||||
@@ -169,7 +166,6 @@ void allium_16way_hash( void *state, const void *input )
|
||||
skein256_8way_update( &ctx.skein, vhashB, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhashB );
|
||||
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhashA, 256 );
|
||||
dintrlv_8x64( hash8, hash9, hash10, hash11, hash12, hash13, hash14, hash15,
|
||||
@@ -179,77 +175,43 @@ void allium_16way_hash( void *state, const void *input )
|
||||
|
||||
intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 );
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
|
||||
|
||||
dintrlv_4x128( state, state+32, state+64, state+96, vhash, 256 );
|
||||
intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
groestl256_4way_init( &ctx.groestl, 32 );
|
||||
groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 );
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
|
||||
|
||||
dintrlv_4x128( state+128, state+160, state+192, state+224, vhash, 256 );
|
||||
intrlv_4x128( vhash, hash8, hash9, hash10, hash11, 256 );
|
||||
|
||||
groestl256_4way_init( &ctx.groestl, 32 );
|
||||
groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 );
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
|
||||
|
||||
dintrlv_4x128( state+256, state+288, state+320, state+352, vhash, 256 );
|
||||
intrlv_4x128( vhash, hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
groestl256_4way_init( &ctx.groestl, 32 );
|
||||
groestl256_4way_update_close( &ctx.groestl, vhash, vhash, 256 );
|
||||
groestl256_4way_full( &ctx.groestl, vhash, vhash, 256 );
|
||||
|
||||
dintrlv_4x128( state+384, state+416, state+448, state+480, vhash, 256 );
|
||||
|
||||
#else
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+128, hash4, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+160, hash5, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+192, hash6, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+224, hash7, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+256, hash8, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+288, hash9, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+320, hash10, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+352, hash11, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+384, hash12, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+416, hash13, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+448, hash14, 256 );
|
||||
memcpy( &ctx.groestl, &allium_16way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+480, hash15, 256 );
|
||||
|
||||
groestl256_full( &ctx.groestl, state, hash0, 256 );
|
||||
groestl256_full( &ctx.groestl, state+32, hash1, 256 );
|
||||
groestl256_full( &ctx.groestl, state+64, hash2, 256 );
|
||||
groestl256_full( &ctx.groestl, state+96, hash3, 256 );
|
||||
groestl256_full( &ctx.groestl, state+128, hash4, 256 );
|
||||
groestl256_full( &ctx.groestl, state+160, hash5, 256 );
|
||||
groestl256_full( &ctx.groestl, state+192, hash6, 256 );
|
||||
groestl256_full( &ctx.groestl, state+224, hash7, 256 );
|
||||
groestl256_full( &ctx.groestl, state+256, hash8, 256 );
|
||||
groestl256_full( &ctx.groestl, state+288, hash9, 256 );
|
||||
groestl256_full( &ctx.groestl, state+320, hash10, 256 );
|
||||
groestl256_full( &ctx.groestl, state+352, hash11, 256 );
|
||||
groestl256_full( &ctx.groestl, state+384, hash12, 256 );
|
||||
groestl256_full( &ctx.groestl, state+416, hash13, 256 );
|
||||
groestl256_full( &ctx.groestl, state+448, hash14, 256 );
|
||||
groestl256_full( &ctx.groestl, state+480, hash15, 256 );
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -283,7 +245,7 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
|
||||
if ( unlikely( valid_hash( hash+(lane<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
submit_solution( work, hash+(lane<<3), mythr );
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
@@ -393,28 +355,14 @@ void allium_8way_hash( void *hash, const void *input )
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash4, hash4, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash5, hash5, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash6, hash6, 256 );
|
||||
memcpy( &ctx.groestl, &allium_8way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash7, hash7, 256 );
|
||||
groestl256_full( &ctx.groestl, hash0, hash0, 256 );
|
||||
groestl256_full( &ctx.groestl, hash1, hash1, 256 );
|
||||
groestl256_full( &ctx.groestl, hash2, hash2, 256 );
|
||||
groestl256_full( &ctx.groestl, hash3, hash3, 256 );
|
||||
groestl256_full( &ctx.groestl, hash4, hash4, 256 );
|
||||
groestl256_full( &ctx.groestl, hash5, hash5, 256 );
|
||||
groestl256_full( &ctx.groestl, hash6, hash6, 256 );
|
||||
groestl256_full( &ctx.groestl, hash7, hash7, 256 );
|
||||
}
|
||||
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
@@ -446,7 +394,7 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#if !( defined(ALLIUM_16WAY) || defined(ALLIUM_8WAY) || defined(ALLIUM_4WAY) )
|
||||
|
||||
#include <memory.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
@@ -73,37 +76,35 @@ int scanhash_allium( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x3ffff;
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
edata[i] = bswap_32( pdata[i] );
|
||||
|
||||
sph_blake256_init( &allium_ctx.blake );
|
||||
sph_blake256( &allium_ctx.blake, endiandata, 64 );
|
||||
sph_blake256( &allium_ctx.blake, edata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], nonce );
|
||||
allium_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = nonce;
|
||||
allium_hash( hash, edata );
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -94,12 +94,12 @@ bool lyra2rev2_thread_init()
|
||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||
#if defined (LYRA2REV2_8WAY)
|
||||
#if defined (LYRA2REV2_16WAY)
|
||||
l2v2_wholeMatrix = _mm_malloc( 2 * size, 64 ); // 2 way
|
||||
init_lyra2rev2_8way_ctx();;
|
||||
#elif defined (LYRA2REV2_4WAY)
|
||||
init_lyra2rev2_16way_ctx();;
|
||||
#elif defined (LYRA2REV2_8WAY)
|
||||
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
||||
init_lyra2rev2_4way_ctx();;
|
||||
init_lyra2rev2_8way_ctx();;
|
||||
#else
|
||||
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
||||
init_lyra2rev2_ctx();
|
||||
@@ -109,17 +109,17 @@ bool lyra2rev2_thread_init()
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (LYRA2REV2_8WAY)
|
||||
#if defined (LYRA2REV2_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2_16way;
|
||||
gate->hash = (void*)&lyra2rev2_16way_hash;
|
||||
#elif defined (LYRA2REV2_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2_8way;
|
||||
gate->hash = (void*)&lyra2rev2_8way_hash;
|
||||
#elif defined (LYRA2REV2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
|
||||
gate->hash = (void*)&lyra2rev2_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||
gate->hash = (void*)&lyra2rev2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
@@ -228,13 +228,14 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
|
||||
bool register_phi2_algo( algo_gate_t* gate )
|
||||
{
|
||||
// init_phi2_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
|
||||
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
|
||||
gate->build_extraheader = (void*)&phi2_build_extraheader;
|
||||
opt_target_factor = 256.0;
|
||||
#if defined(PHI2_4WAY)
|
||||
#if defined(PHI2_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_phi2_8way;
|
||||
#elif defined(PHI2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_phi2_4way;
|
||||
#else
|
||||
init_phi2_ctx();
|
||||
|
@@ -51,28 +51,29 @@ bool init_lyra2rev3_ctx();
|
||||
//////////////////////////////////
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define LYRA2REV2_8WAY 1
|
||||
#define LYRA2REV2_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define LYRA2REV2_4WAY 1
|
||||
#define LYRA2REV2_8WAY 1
|
||||
#endif
|
||||
|
||||
extern __thread uint64_t* l2v2_wholeMatrix;
|
||||
|
||||
bool register_lyra2rev2_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(LYRA2REV2_8WAY)
|
||||
#if defined(LYRA2REV2_16WAY)
|
||||
|
||||
void lyra2rev2_16way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_16way_ctx();
|
||||
|
||||
#elif defined(LYRA2REV2_8WAY)
|
||||
|
||||
void lyra2rev2_8way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_8way_ctx();
|
||||
|
||||
#elif defined(LYRA2REV2_4WAY)
|
||||
|
||||
void lyra2rev2_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_4way_ctx();
|
||||
|
||||
#else
|
||||
|
||||
@@ -185,19 +186,26 @@ bool init_allium_ctx();
|
||||
|
||||
/////////////////////////////////////////
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
// #define PHI2_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define PHI2_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define PHI2_4WAY 1
|
||||
#endif
|
||||
|
||||
extern bool phi2_has_roots;
|
||||
|
||||
bool register_phi2_algo( algo_gate_t* gate );
|
||||
#if defined(PHI2_4WAY)
|
||||
#if defined(PHI2_8WAY)
|
||||
|
||||
void phi2_8way_hash( void *state, const void *input );
|
||||
int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(PHI2_4WAY)
|
||||
|
||||
void phi2_hash_4way( void *state, const void *input );
|
||||
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
//void init_phi2_ctx();
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -76,7 +76,7 @@ int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce,
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#if !( defined(LYRA2H_8WAY) || defined(LYRA2H_4WAY) )
|
||||
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2.h"
|
||||
@@ -71,3 +74,4 @@ int scanhash_lyra2h( struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -7,23 +7,227 @@
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cube-hash-2way.h"
|
||||
|
||||
#if defined (LYRA2REV2_8WAY)
|
||||
|
||||
#if defined (LYRA2REV2_16WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_16way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_8way_context skein;
|
||||
bmw256_16way_context bmw;
|
||||
} lyra2v2_16way_ctx_holder __attribute__ ((aligned (64)));
|
||||
|
||||
static lyra2v2_16way_ctx_holder l2v2_16way_ctx;
|
||||
|
||||
bool init_lyra2rev2_16way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &l2v2_16way_ctx.keccak );
|
||||
cubehashInit( &l2v2_16way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &l2v2_16way_ctx.skein );
|
||||
bmw256_16way_init( &l2v2_16way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
|
||||
void lyra2rev2_16way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash8[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash9[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash10[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash11[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash12[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash13[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash14[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash15[8] __attribute__ ((aligned (64)));
|
||||
lyra2v2_16way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &l2v2_16way_ctx, sizeof(l2v2_16way_ctx) );
|
||||
|
||||
blake256_16way_update( &ctx.blake, input + (64<<4), 16 );
|
||||
blake256_16way_close( &ctx.blake, vhash );
|
||||
|
||||
dintrlv_16x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
keccak256_8way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
intrlv_8x64( vhash, hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
keccak256_8way_init( &ctx.keccak );
|
||||
keccak256_8way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash5, vhash, 256 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
|
||||
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash8, hash9, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash8, hash9, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash10, hash11, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash10, hash11, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash12, hash13, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash12, hash13, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash14, hash15, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash14, hash15, vhash, 256 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
intrlv_8x64( vhash, hash8, hash9, hash10, hash11, hash12,
|
||||
hash13, hash14, hash15, 256 );
|
||||
|
||||
skein256_8way_init( &ctx.skein );
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
|
||||
|
||||
intrlv_16x32( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7,
|
||||
hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash15, 256 );
|
||||
|
||||
bmw256_16way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_16way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &hash[7*16];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
|
||||
blake256_16way_init( &l2v2_16way_ctx.blake );
|
||||
blake256_16way_update( &l2v2_16way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
lyra2rev2_16way_hash( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (LYRA2REV2_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake256_8way_context blake;
|
||||
keccak256_8way_context keccak;
|
||||
cube_4way_context cube;
|
||||
skein256_8way_context skein;
|
||||
bmw256_8way_context bmw;
|
||||
keccak256_4way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
bmw256_8way_context bmw;
|
||||
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
|
||||
|
||||
static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
|
||||
|
||||
bool init_lyra2rev2_8way_ctx()
|
||||
{
|
||||
keccak256_8way_init( &l2v2_8way_ctx.keccak );
|
||||
cube_4way_init( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_8way_init( &l2v2_8way_ctx.skein );
|
||||
keccak256_4way_init( &l2v2_8way_ctx.keccak );
|
||||
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &l2v2_8way_ctx.skein );
|
||||
bmw256_8way_init( &l2v2_8way_ctx.bmw );
|
||||
return true;
|
||||
}
|
||||
@@ -31,8 +235,6 @@ bool init_lyra2rev2_8way_ctx()
|
||||
void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
@@ -47,103 +249,113 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
keccak256_4way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
keccak256_4way_init( &ctx.keccak );
|
||||
keccak256_4way_update( &ctx.keccak, vhash, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
|
||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_8way_close( &ctx.skein, vhash );
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
|
||||
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash4, 32, hash4, 32, hash4, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash5, 32, hash5, 32, hash5, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
|
||||
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
skein256_4way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
skein256_4way_init( &ctx.skein );
|
||||
skein256_4way_update( &ctx.skein, vhash, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash );
|
||||
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 256 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
bmw256_8way_update( &ctx.bmw, vhash, 32 );
|
||||
bmw256_8way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hashd7 = &hash[7*8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
blake256_8way_init( &l2v2_8way_ctx.blake );
|
||||
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev2_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
#elif defined (LYRA2REV2_4WAY)
|
||||
|
||||
typedef struct {
|
||||
@@ -226,15 +438,16 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hashd7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -249,21 +462,22 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev2_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
*/
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#if !( defined(LYRA2REV2_16WAY) || defined(LYRA2REV2_8WAY) || defined(LYRA2REV2_4WAY) )
|
||||
|
||||
#include <memory.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
@@ -96,7 +99,7 @@ int scanhash_lyra2rev2( struct work *work,
|
||||
lyra2rev2_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
@@ -107,4 +110,4 @@ int scanhash_lyra2rev2( struct work *work,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -79,19 +79,16 @@ void lyra2rev3_16way_hash( void *state, const void *input )
|
||||
dintrlv_2x256( hash14, hash15, vhash, 256 );
|
||||
|
||||
intrlv_4x128( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 32 );
|
||||
cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 );
|
||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
intrlv_4x128( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 32 );
|
||||
cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 );
|
||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
intrlv_4x128( vhash, hash8, hash9, hash10, hash11, 256 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 32 );
|
||||
cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 );
|
||||
dintrlv_4x128( hash8, hash9, hash10, hash11, vhash, 256 );
|
||||
intrlv_4x128( vhash, hash12, hash13, hash14, hash15, 256 );
|
||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
||||
cube_4way_update_close( &ctx.cube, vhash, vhash, 32 );
|
||||
cube_4way_full( &ctx.cube, vhash, 256, vhash, 32 );
|
||||
dintrlv_4x128( hash12, hash13, hash14, hash15, vhash, 256 );
|
||||
|
||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||
@@ -133,15 +130,15 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<4];
|
||||
uint32_t *hashd7 = &hash[7*16];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m512i *noncev = (__m512i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
@@ -162,17 +159,18 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 16;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -197,7 +195,7 @@ bool init_lyra2rev3_8way_ctx()
|
||||
|
||||
void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
@@ -224,21 +222,14 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash1, (const byte*) hash1, 32 );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash2, (const byte*) hash2, 32 );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash4, (const byte*) hash4, 32 );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash5, (const byte*) hash5, 32 );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash6, (const byte*) hash6, 32 );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash7, (const byte*) hash7, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||
@@ -260,46 +251,47 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<3];
|
||||
uint32_t *hashd7 = &hash[7*8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
blake256_8way_init( &l2v3_8way_ctx.blake );
|
||||
blake256_8way_update( &l2v3_8way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev3_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( likely( (n < max_nonce-8) && !work_restart[thr_id].restart ) );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -366,42 +358,41 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *hashd7 = &(hash[7*4]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t targ32 = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
|
||||
|
||||
blake256_4way_init( &l2v3_4way_ctx.blake );
|
||||
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do
|
||||
{
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2rev3_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#if !( defined(LYRA2REV3_16WAY) || defined(LYRA2REV3_8WAY) || defined(LYRA2REV3_4WAY) )
|
||||
|
||||
#include <memory.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
@@ -85,7 +88,7 @@ int scanhash_lyra2rev3( struct work *work,
|
||||
lyra2rev3_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
@@ -96,4 +99,4 @@ int scanhash_lyra2rev3( struct work *work,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -97,41 +97,42 @@ void lyra2z_16way_hash( void *state, const void *input )
|
||||
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||
uint64_t hash[4*16] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
|
||||
lyra2z_16way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm512_bswap_32( _mm512_set_epi32( n+15, n+14, n+13, n+12,
|
||||
n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4,
|
||||
n+ 3, n+ 2, n+ 1, n ) );
|
||||
lyra2z_16way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
const uint64_t *lane_hash = hash + (lane<<2);
|
||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||
n += 16;
|
||||
} while ( (n < max_nonce-16) && !work_restart[thr_id].restart);
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -195,39 +196,40 @@ void lyra2z_8way_hash( void *state, const void *input )
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32(
|
||||
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
|
||||
lyra2z_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
const uint64_t *lane_hash = hash + (lane<<2);
|
||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -274,39 +276,40 @@ void lyra2z_4way_hash( void *state, const void *input )
|
||||
int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
if ( bench ) ptarget[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
|
||||
lyra2z_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
lyra2z_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
const uint64_t *lane_hash = hash + (lane<<2);
|
||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1,6 +1,9 @@
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#if !( defined(LYRA2Z_16WAY) || defined(LYRA2Z_8WAY) || defined(LYRA2Z_4WAY) )
|
||||
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "simd-utils.h"
|
||||
@@ -53,7 +56,7 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
@@ -62,14 +65,13 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
|
||||
lyra2z_midstate( endiandata );
|
||||
lyra2z_midstate( endiandata );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
lyra2z_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
@@ -80,4 +82,4 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -9,7 +9,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
||||
{
|
||||
uint32_t _ALIGN(256) hash[16];
|
||||
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, input, 80, input, 80,
|
||||
2, 330, 256 );
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
@@ -18,38 +18,40 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
||||
int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8] __attribute__ ((aligned (128)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], nonce );
|
||||
lyra2z330_hash( hash, endiandata, work->height );
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = nonce;
|
||||
|
||||
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, edata, 80, edata, 80,
|
||||
2, 330, 256 );
|
||||
|
||||
// lyra2z330_hash( hash, edata, work->height );
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
be32enc( pdata + 19, nonce );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
*hashes_done = nonce - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -66,7 +68,7 @@ bool lyra2z330_thread_init()
|
||||
|
||||
bool register_lyra2z330_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z330;
|
||||
gate->hash = (void*)&lyra2z330_hash;
|
||||
|
@@ -1,233 +1,501 @@
|
||||
/**
|
||||
* Phi-2 algo Implementation
|
||||
*/
|
||||
|
||||
#include "lyra2-gate.h"
|
||||
|
||||
#if defined(PHI2_4WAY)
|
||||
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "lyra2.h"
|
||||
#if defined(__VAES__)
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#elif defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
#if defined(PHI2_8WAY)
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
jh512_8way_context jh;
|
||||
#if defined(__VAES__)
|
||||
echo_4way_context echo;
|
||||
#else
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
sph_gost512_context gost;
|
||||
skein512_8way_context skein;
|
||||
} phi2_8way_ctx_holder;
|
||||
|
||||
void phi2_8way_hash( void *state, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(128) hash[64*8];
|
||||
unsigned char _ALIGN(128) hashA[64*2];
|
||||
unsigned char _ALIGN(64) hash0[64];
|
||||
unsigned char _ALIGN(64) hash1[64];
|
||||
unsigned char _ALIGN(64) hash2[64];
|
||||
unsigned char _ALIGN(64) hash3[64];
|
||||
unsigned char _ALIGN(64) hash4[64];
|
||||
unsigned char _ALIGN(64) hash5[64];
|
||||
unsigned char _ALIGN(64) hash6[64];
|
||||
unsigned char _ALIGN(64) hash7[64];
|
||||
const int size = phi2_has_roots ? 144 : 80 ;
|
||||
phi2_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*)hash0, 512,
|
||||
(const byte*)input, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1, 512,
|
||||
(const byte*)input + 144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2, 512,
|
||||
(const byte*)input + 2*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3, 512,
|
||||
(const byte*)input + 3*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash4, 512,
|
||||
(const byte*)input + 4*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash5, 512,
|
||||
(const byte*)input + 5*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash6, 512,
|
||||
(const byte*)input + 6*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash7, 512,
|
||||
(const byte*)input + 7*144, size );
|
||||
|
||||
intrlv_2x256( hashA, hash0, hash1, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash0, hash1, hash, 512 );
|
||||
intrlv_2x256( hashA, hash2, hash3, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash2, hash3, hash, 512 );
|
||||
intrlv_2x256( hashA, hash4, hash5, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash4, hash5, hash, 512 );
|
||||
intrlv_2x256( hashA, hash6, hash7, 512 );
|
||||
LYRA2RE_2WAY( hash, 32, hashA, 32, 1, 8, 8 );
|
||||
LYRA2RE_2WAY( hash + 2*32, 32, hashA + 2*32, 32, 1, 8, 8 );
|
||||
dintrlv_2x256( hash6, hash7, hash, 512 );
|
||||
|
||||
intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, (const void*)hash, 64 );
|
||||
jh512_8way_close( &ctx.jh, (void*)hash );
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, hash );
|
||||
|
||||
#if defined (__VAES__)
|
||||
|
||||
unsigned char _ALIGN(64) hashA0[64];
|
||||
unsigned char _ALIGN(64) hashA1[64];
|
||||
unsigned char _ALIGN(64) hashA2[64];
|
||||
unsigned char _ALIGN(64) hashA3[64];
|
||||
unsigned char _ALIGN(64) hashA4[64];
|
||||
unsigned char _ALIGN(64) hashA5[64];
|
||||
unsigned char _ALIGN(64) hashA6[64];
|
||||
unsigned char _ALIGN(64) hashA7[64];
|
||||
|
||||
intrlv_4x128_512( hash, hash0, hash1, hash2, hash3 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
dintrlv_4x128_512( hashA0, hashA1, hashA2, hashA3, hash );
|
||||
|
||||
intrlv_4x128_512( hash, hash4, hash5, hash6, hash7 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
echo_4way_full( &ctx.echo, hash, 512, hash, 64 );
|
||||
dintrlv_4x128_512( hashA4, hashA5, hashA6, hashA7, hash );
|
||||
|
||||
#endif
|
||||
|
||||
if ( hash0[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash0 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash0, hashA0, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash1[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash1 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash1, hashA1, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash2[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash2 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash2, hashA2, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash3[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash3 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash3, hashA3, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash4[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash4, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash4 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash4, hashA4, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash4, 512,
|
||||
(const BitSequence *)hash4, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash5[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash5, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash5 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash5, hashA5, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash5, 512,
|
||||
(const BitSequence *)hash5, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash6[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash6, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash6 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash6, hashA6, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash6, 512,
|
||||
(const BitSequence *)hash6, 64 );
|
||||
}
|
||||
#endif
|
||||
if ( hash7[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash7, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash7 );
|
||||
}
|
||||
else
|
||||
#if defined (__VAES__)
|
||||
memcpy( hash7, hashA7, 64 );
|
||||
#else
|
||||
{
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash7, 512,
|
||||
(const BitSequence *)hash7, 64 );
|
||||
}
|
||||
#endif
|
||||
|
||||
intrlv_8x64_512( hash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, (const void*)hash, 64 );
|
||||
skein512_8way_close( &ctx.skein, (void*)hash );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
casti_m512i( state, i ) = _mm512_xor_si512( casti_m512i( hash, i ),
|
||||
casti_m512i( hash, i+4 ) );
|
||||
}
|
||||
|
||||
int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16*8];
|
||||
uint32_t _ALIGN(128) edata[36*8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *hash7 = &(hash[49]);
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x00ff;
|
||||
|
||||
phi2_has_roots = false;
|
||||
|
||||
for ( int i = 0; i < 36; i++ )
|
||||
{
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
edata[ i + 36 ] = edata[ i + 2*36 ] = edata[ i + 3*36 ] =
|
||||
edata[ i + 4*36 ] = edata[ i + 5*36 ] = edata[ i + 6*36 ] =
|
||||
edata[ i + 7*36 ] = edata[ i ];
|
||||
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
edata[ 19 ] = n;
|
||||
edata[ 36 + 19 ] = n+1;
|
||||
edata[ 2*36 + 19 ] = n+2;
|
||||
edata[ 3*36 + 19 ] = n+3;
|
||||
edata[ 4*36 + 19 ] = n+4;
|
||||
edata[ 5*36 + 19 ] = n+5;
|
||||
edata[ 6*36 + 19 ] = n+6;
|
||||
edata[ 7*36 + 19 ] = n+7;
|
||||
|
||||
do {
|
||||
phi2_8way_hash( hash, edata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
|
||||
{
|
||||
uint64_t _ALIGN(64) lane_hash[8];
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
be32enc( pdata + 19, n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
edata[ 19 ] += 8;
|
||||
edata[ 36 + 19 ] += 8;
|
||||
edata[ 2*36 + 19 ] += 8;
|
||||
edata[ 3*36 + 19 ] += 8;
|
||||
edata[ 4*36 + 19 ] += 8;
|
||||
edata[ 5*36 + 19 ] += 8;
|
||||
edata[ 6*36 + 19 ] += 8;
|
||||
edata[ 7*36 + 19 ] += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
#elif defined(PHI2_4WAY)
|
||||
|
||||
typedef struct {
|
||||
cubehashParam cube;
|
||||
jh512_4way_context jh;
|
||||
#if defined(__AES__)
|
||||
hashState_echo echo;
|
||||
// hashState_echo echo2;
|
||||
#else
|
||||
sph_echo512_context echo;
|
||||
#endif
|
||||
sph_gost512_context gost;
|
||||
skein512_4way_context skein;
|
||||
} phi2_ctx_holder;
|
||||
/*
|
||||
phi2_ctx_holder phi2_ctx;
|
||||
} phi2_4way_ctx_holder;
|
||||
|
||||
void init_phi2_ctx()
|
||||
phi2_4way_ctx_holder phi2_4way_ctx;
|
||||
|
||||
void phi2_4way_hash(void *state, const void *input)
|
||||
{
|
||||
cubehashInit( &phi2_ctx.cube, 512, 16, 32 );
|
||||
sph_jh512_init(&phi2_ctx.jh);
|
||||
init_echo( &phi2_ctx.echo1, 512 );
|
||||
init_echo( &phi2_ctx.echo2, 512 );
|
||||
sph_gost512_init(&phi2_ctx.gost);
|
||||
sph_skein512_init(&phi2_ctx.skein);
|
||||
};
|
||||
*/
|
||||
void phi2_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t hashA[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t hashB[4][16] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[4*16] __attribute__ ((aligned (64)));
|
||||
unsigned char _ALIGN(128) hash[64*4];
|
||||
unsigned char _ALIGN(64) hash0[64];
|
||||
unsigned char _ALIGN(64) hash1[64];
|
||||
unsigned char _ALIGN(64) hash2[64];
|
||||
unsigned char _ALIGN(64) hash3[64];
|
||||
unsigned char _ALIGN(64) hash0A[64];
|
||||
unsigned char _ALIGN(64) hash1A[64];
|
||||
unsigned char _ALIGN(64) hash2A[64];
|
||||
unsigned char _ALIGN(64) hash3A[64];
|
||||
const int size = phi2_has_roots ? 144 : 80 ;
|
||||
phi2_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
// unsigned char _ALIGN(128) hash[64];
|
||||
// unsigned char _ALIGN(128) hashA[64];
|
||||
// unsigned char _ALIGN(128) hashB[64];
|
||||
cubehash_full( &ctx.cube, (byte*)hash0A, 512,
|
||||
(const byte*)input, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash1A, 512,
|
||||
(const byte*)input + 144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash2A, 512,
|
||||
(const byte*)input + 2*144, size );
|
||||
cubehash_full( &ctx.cube, (byte*)hash3A, 512,
|
||||
(const byte*)input + 3*144, size );
|
||||
|
||||
LYRA2RE( &hash0[ 0], 32, hash0A, 32, hash0A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash0[32], 32, hash0A+32, 32, hash0A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash1[ 0], 32, hash1A, 32, hash1A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash1[32], 32, hash1A+32, 32, hash1A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash2[ 0], 32, hash2A, 32, hash2A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash2[32], 32, hash2A+32, 32, hash2A+32, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash3[ 0], 32, hash3A, 32, hash3A, 32, 1, 8, 8 );
|
||||
LYRA2RE( &hash3[32], 32, hash3A+32, 32, hash3A+32, 32, 1, 8, 8 );
|
||||
|
||||
phi2_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
// memcpy( &ctx, &phi2_ctx, sizeof(phi2_ctx) );
|
||||
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[0], (const byte*)input,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[1], (const byte*)input+144,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[2], (const byte*)input+288,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
cubehashInit( &ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hashB[3], (const byte*)input+432,
|
||||
phi2_has_roots ? 144 : 80 );
|
||||
|
||||
LYRA2RE( &hashA[0][0], 32, &hashB[0][0], 32, &hashB[0][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[0][8], 32, &hashB[0][8], 32, &hashB[0][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[1][0], 32, &hashB[1][0], 32, &hashB[1][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[1][8], 32, &hashB[1][8], 32, &hashB[1][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[2][0], 32, &hashB[2][0], 32, &hashB[2][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[2][8], 32, &hashB[2][8], 32, &hashB[2][8], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[3][0], 32, &hashB[3][0], 32, &hashB[3][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[3][8], 32, &hashB[3][8], 32, &hashB[3][8], 32, 1, 8, 8 );
|
||||
|
||||
intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
|
||||
intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
jh512_4way_update( &ctx.jh, (const void*)hash, 64 );
|
||||
jh512_4way_close( &ctx.jh, (void*)hash );
|
||||
|
||||
dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
|
||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, hash );
|
||||
|
||||
if ( hash[0][0] & 1 )
|
||||
if ( hash0[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[0], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[0] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash0, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
|
||||
(const BitSequence *)hash[0], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[0],
|
||||
(const BitSequence *)hash[0], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
|
||||
(const BitSequence *)hash0, 64 );
|
||||
}
|
||||
|
||||
if ( hash[1][0] & 1 )
|
||||
if ( hash1[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[1], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[1] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash1, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
|
||||
(const BitSequence *)hash[1], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[1],
|
||||
(const BitSequence *)hash[1], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
|
||||
(const BitSequence *)hash1, 64 );
|
||||
}
|
||||
|
||||
if ( hash[2][0] & 1 )
|
||||
if ( hash2[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[2], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[2] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash2, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
|
||||
(const BitSequence *)hash[2], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[2],
|
||||
(const BitSequence *)hash[2], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
|
||||
(const BitSequence *)hash2, 64 );
|
||||
}
|
||||
|
||||
if ( hash[3][0] & 1 )
|
||||
if ( hash3[0] & 1 )
|
||||
{
|
||||
sph_gost512_init( &ctx.gost );
|
||||
sph_gost512( &ctx.gost, (const void*)hash[3], 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash[3] );
|
||||
sph_gost512( &ctx.gost, (const void*)hash3, 64 );
|
||||
sph_gost512_close( &ctx.gost, (void*)hash3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
|
||||
(const BitSequence *)hash[3], 512 );
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo ( &ctx.echo, (BitSequence *)hash[3],
|
||||
(const BitSequence *)hash[3], 512 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
}
|
||||
|
||||
intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
|
||||
|
||||
intrlv_4x64_512( hash, hash0, hash1, hash2, hash3 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_update( &ctx.skein, (const void*)hash, 64 );
|
||||
skein512_4way_close( &ctx.skein, (void*)hash );
|
||||
|
||||
for (int i=0; i<4; i++)
|
||||
{
|
||||
( (uint64_t*)vhash )[i] ^= ( (uint64_t*)vhash )[i+4];
|
||||
( (uint64_t*)vhash+ 8 )[i] ^= ( (uint64_t*)vhash+ 8 )[i+4];
|
||||
( (uint64_t*)vhash+16 )[i] ^= ( (uint64_t*)vhash+16 )[i+4];
|
||||
( (uint64_t*)vhash+24 )[i] ^= ( (uint64_t*)vhash+24 )[i+4];
|
||||
}
|
||||
// for ( int i = 0; i < 4; i++ )
|
||||
// casti_m256i( vhash, i ) = _mm256_xor_si256( casti_m256i( vhash, i ),
|
||||
// casti_m256i( vhash, i+4 ) );
|
||||
|
||||
memcpy( state, vhash, 128 );
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
casti_m256i( state, i ) = _mm256_xor_si256( casti_m256i( hash, i ),
|
||||
casti_m256i( hash, i+4 ) );
|
||||
}
|
||||
|
||||
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) edata[36];
|
||||
uint32_t vdata[4][36] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t _ALIGN(128) hash[16*4];
|
||||
uint32_t _ALIGN(128) edata[36*4];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if(opt_benchmark){
|
||||
ptarget[7] = 0x00ff;
|
||||
}
|
||||
|
||||
// Data is not interleaved, but hash is.
|
||||
// any non-zero data at index 20 or above sets roots true.
|
||||
// Split up the operations, bswap first, then set roots.
|
||||
|
||||
phi2_has_roots = false;
|
||||
for ( int i=0; i < 36; i++ )
|
||||
{
|
||||
be32enc(&edata[i], pdata[i]);
|
||||
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
||||
}
|
||||
/*
|
||||
casti_m256i( vdata[0], 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( vdata[0], 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m256i( vdata[0], 2 ) = mm256_bswap_32( casti_m256i( pdata, 2 ) );
|
||||
casti_m256i( vdata[0], 3 ) = mm256_bswap_32( casti_m256i( pdata, 3 ) );
|
||||
casti_m128i( vdata[0], 8 ) = mm128_bswap_32( casti_m128i( pdata, 8 ) );
|
||||
phi2_has_roots = mm128_anybits1( casti_m128i( vdata[0], 5 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 6 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 7 ) ) ||
|
||||
mm128_anybits1( casti_m128i( vdata[0], 8 ) );
|
||||
*/
|
||||
|
||||
memcpy( vdata[0], edata, 144 );
|
||||
memcpy( vdata[1], edata, 144 );
|
||||
memcpy( vdata[2], edata, 144 );
|
||||
memcpy( vdata[3], edata, 144 );
|
||||
|
||||
do {
|
||||
be32enc( &vdata[0][19], n );
|
||||
be32enc( &vdata[1][19], n+1 );
|
||||
be32enc( &vdata[2][19], n+2 );
|
||||
be32enc( &vdata[3][19], n+3 );
|
||||
|
||||
phi2_hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[ lane<<1 ] < Htarg )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x00ff;
|
||||
|
||||
#endif // PHI2_4WAY
|
||||
phi2_has_roots = false;
|
||||
|
||||
for ( int i = 0; i < 36; i++ )
|
||||
{
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
edata[ i+36 ] = edata[ i+72 ] = edata[ i+108 ] = edata[i];
|
||||
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
edata[ 19 ] = n;
|
||||
edata[ 36 + 19 ] = n+1;
|
||||
edata[ 2*36 + 19 ] = n+2;
|
||||
edata[ 3*36 + 19 ] = n+3;
|
||||
|
||||
do {
|
||||
phi2_4way_hash( hash, edata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash7[ lane<<1 ] <= Htarg && !bench ) )
|
||||
{
|
||||
uint64_t _ALIGN(64) lane_hash[8];
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
be32enc( pdata + 19, n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
edata[ 19 ] += 4;
|
||||
edata[ 36 + 19 ] += 4;
|
||||
edata[ 2*36 + 19 ] += 4;
|
||||
edata[ 3*36 + 19 ] += 4;
|
||||
n +=4;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -96,32 +96,29 @@ int scanhash_phi2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[36];
|
||||
uint32_t _ALIGN(128) edata[36];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if(opt_benchmark){
|
||||
ptarget[7] = 0x00ff;
|
||||
}
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
if( bench ) ptarget[7] = 0x00ff;
|
||||
|
||||
phi2_has_roots = false;
|
||||
for ( int i=0; i < 36; i++ )
|
||||
|
||||
for ( int i = 0; i < 36; i++ )
|
||||
{
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
phi2_hash( hash, endiandata );
|
||||
if ( hash[7] < Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
edata[19] = n;
|
||||
phi2_hash( hash, edata );
|
||||
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
be32enc( pdata+19, n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
n++;
|
||||
|
@@ -89,6 +89,9 @@ inline void initState( uint64_t State[/*16*/] )
|
||||
*
|
||||
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
|
||||
*/
|
||||
|
||||
#if !defined(__AVX512F__) && !defined(__AVX2__) && !defined(__SSE2__)
|
||||
|
||||
inline static void blake2bLyra( uint64_t *v )
|
||||
{
|
||||
ROUND_LYRA(0);
|
||||
@@ -114,6 +117,8 @@ inline static void reducedBlake2bLyra( uint64_t *v )
|
||||
ROUND_LYRA(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Performs a squeeze operation, using Blake2b's G function as the
|
||||
* internal permutation
|
||||
|
@@ -171,7 +171,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7)
|
||||
|
||||
|
||||
#endif // AVX2 else SSE2
|
||||
|
||||
// Scalar
|
||||
@@ -200,7 +199,6 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
union _ovly_512
|
||||
|
@@ -311,7 +311,7 @@ bool register_m7m_algo( algo_gate_t *gate )
|
||||
{
|
||||
gate->optimizations = SHA_OPT;
|
||||
init_m7m_ctx();
|
||||
gate->scanhash = (void*)scanhash_m7m_hash;
|
||||
gate->scanhash = (void*)&scanhash_m7m_hash;
|
||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
@@ -108,7 +108,7 @@ int scanhash_nist5_8way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
@@ -196,7 +196,7 @@ int scanhash_nist5_4way( struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "nist5-gate.h"
|
||||
|
||||
#if !defined(NIST5_8WAY) && !defined(NIST5_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -105,13 +108,4 @@ int scanhash_nist5( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
bool register_nist5_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
init_nist5_ctx();
|
||||
gate->scanhash = (void*)&scanhash_nist5;
|
||||
gate->hash = (void*)&nist5hash;
|
||||
return true;
|
||||
};
|
||||
*/
|
||||
#endif
|
||||
|
@@ -158,7 +158,7 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
{
|
||||
// ignore POK in first word
|
||||
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
||||
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
|
||||
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
|
||||
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
|
||||
|| ( *nonceptr >= *end_nonce_ptr ) )
|
||||
{
|
||||
|
@@ -1,18 +1,241 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "anime-gate.h"
|
||||
|
||||
#if defined (ANIME_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/bmw/bmw-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#if defined(__VAES__)
|
||||
#include "algo/groestl/groestl512-hash-4way.h"
|
||||
#endif
|
||||
|
||||
#if defined (ANIME_8WAY)
|
||||
|
||||
typedef struct {
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
jh512_8way_context jh;
|
||||
skein512_8way_context skein;
|
||||
keccak512_8way_context keccak;
|
||||
} anime_8way_ctx_holder;
|
||||
|
||||
anime_8way_ctx_holder anime_8way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_anime_8way_ctx()
|
||||
{
|
||||
blake512_8way_init( &anime_8way_ctx.blake );
|
||||
bmw512_8way_init( &anime_8way_ctx.bmw );
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_init( &anime_8way_ctx.groestl, 64 );
|
||||
#else
|
||||
init_groestl( &anime_8way_ctx.groestl, 64 );
|
||||
#endif
|
||||
skein512_8way_init( &anime_8way_ctx.skein );
|
||||
jh512_8way_init( &anime_8way_ctx.jh );
|
||||
keccak512_8way_init( &anime_8way_ctx.keccak );
|
||||
}
|
||||
|
||||
void anime_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashC[8*8] __attribute__ ((aligned (64)));
|
||||
#if !defined(__VAES__)
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash4[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash5[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash6[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash7[8] __attribute__ ((aligned (64)));
|
||||
#endif
|
||||
__m512i* vh = (__m512i*)vhash;
|
||||
__m512i* vhA = (__m512i*)vhashA;
|
||||
__m512i* vhB = (__m512i*)vhashB;
|
||||
__m512i* vhC = (__m512i*)vhashC;
|
||||
const __m512i bit3_mask = m512_const1_64( 8 );
|
||||
const __m512i zero = _mm512_setzero_si512();
|
||||
__mmask8 vh_mask;
|
||||
anime_8way_ctx_holder ctx;
|
||||
memcpy( &ctx, &anime_8way_ctx, sizeof(anime_8way_ctx) );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, input, 80 );
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
if ( hash4[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
if ( hash5[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
if ( hash6[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
if ( hash7[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhashC, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
#endif
|
||||
|
||||
if ( vh_mask & 0xff )
|
||||
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
||||
|
||||
#if defined(__VAES__)
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash );
|
||||
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7 );
|
||||
|
||||
#endif
|
||||
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
if ( ( vh_mask & 0xff ) != 0xff )
|
||||
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
if ( vh_mask & 0xff )
|
||||
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
if ( ( vh_mask & 0xff ) != 0xff )
|
||||
{
|
||||
keccak512_8way_init( &ctx.keccak );
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
if ( vh_mask & 0xff )
|
||||
{
|
||||
jh512_8way_init( &ctx.jh );
|
||||
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_8way_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
casti_m512i( state,0 ) = _mm512_mask_blend_epi64( vh_mask, vhA[0], vhB[0] );
|
||||
casti_m512i( state,1 ) = _mm512_mask_blend_epi64( vh_mask, vhA[1], vhB[1] );
|
||||
casti_m512i( state,2 ) = _mm512_mask_blend_epi64( vh_mask, vhA[2], vhB[2] );
|
||||
casti_m512i( state,3 ) = _mm512_mask_blend_epi64( vh_mask, vhA[3], vhB[3] );
|
||||
}
|
||||
|
||||
int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint64_t hash64[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint64_t *hash64_q3 = &(hash64[3*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
|
||||
do
|
||||
{
|
||||
anime_8way_hash( hash64, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash64, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (ANIME_4WAY)
|
||||
|
||||
typedef struct {
|
||||
blake512_4way_context blake;
|
||||
@@ -23,18 +246,6 @@ typedef struct {
|
||||
keccak512_4way_context keccak;
|
||||
} anime_4way_ctx_holder;
|
||||
|
||||
anime_4way_ctx_holder anime_4way_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_anime_4way_ctx()
|
||||
{
|
||||
blake512_4way_init( &anime_4way_ctx.blake );
|
||||
bmw512_4way_init( &anime_4way_ctx.bmw );
|
||||
init_groestl( &anime_4way_ctx.groestl, 64 );
|
||||
skein512_4way_init( &anime_4way_ctx.skein );
|
||||
jh512_4way_init( &anime_4way_ctx.jh );
|
||||
keccak512_4way_init( &anime_4way_ctx.keccak );
|
||||
}
|
||||
|
||||
void anime_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
@@ -48,81 +259,61 @@ void anime_4way_hash( void *state, const void *input )
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
const uint32_t mask = 8;
|
||||
int h_mask;
|
||||
const __m256i bit3_mask = m256_const1_64( 8 );
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
anime_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
|
||||
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, input, 80 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
blake512_4way_full( &ctx.blake, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
if ( hash0[0] & mask )
|
||||
{
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
}
|
||||
if ( hash1[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
}
|
||||
if ( hash2[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
}
|
||||
if ( hash3[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
}
|
||||
// A
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
// B
|
||||
if ( h_mask & 0xffffffff )
|
||||
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
// A
|
||||
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
// B
|
||||
if ( h_mask & 0xffffffff )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
@@ -131,90 +322,76 @@ void anime_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
// A
|
||||
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
// B
|
||||
if ( h_mask & 0xffffffff )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhashB );
|
||||
}
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
casti_m256i( state, 0 ) = _mm256_blendv_epi8( vhA[0], vhB[0], vh_mask );
|
||||
casti_m256i( state, 1 ) = _mm256_blendv_epi8( vhA[1], vhB[1], vh_mask );
|
||||
casti_m256i( state, 2 ) = _mm256_blendv_epi8( vhA[2], vhB[2], vh_mask );
|
||||
casti_m256i( state, 3 ) = _mm256_blendv_epi8( vhA[3], vhB[3], vh_mask );
|
||||
}
|
||||
|
||||
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash64[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint64_t *hash64_q3 = &(hash64[3*4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
anime_4way_hash( hash64, vdata );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
|
||||
do
|
||||
extr_lane_4x64( lane_hash, hash64, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
anime_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -2,8 +2,10 @@
|
||||
|
||||
bool register_anime_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ANIME_4WAY)
|
||||
init_anime_4way_ctx();
|
||||
#if defined (ANIME_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_anime_8way;
|
||||
gate->hash = (void*)&anime_8way_hash;
|
||||
#elif defined (ANIME_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_anime_4way;
|
||||
gate->hash = (void*)&anime_4way_hash;
|
||||
#else
|
||||
@@ -11,7 +13,7 @@ bool register_anime_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_anime;
|
||||
gate->hash = (void*)&anime_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -4,18 +4,25 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define ANIME_4WAY
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define ANIME_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define ANIME_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_anime_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(ANIME_4WAY)
|
||||
#if defined(ANIME_8WAY)
|
||||
|
||||
void anime_8way_hash( void *state, const void *input );
|
||||
int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(ANIME_4WAY)
|
||||
|
||||
void anime_4way_hash( void *state, const void *input );
|
||||
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_anime_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -1,5 +1,8 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "anime-gate.h"
|
||||
|
||||
#if !defined(ANIME_8WAY) && !defined(ANIME_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
@@ -123,50 +126,29 @@ int scanhash_anime( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t edata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
const int thr_id = mythr->id;
|
||||
const int bench = opt_benchmark;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do
|
||||
{
|
||||
be32enc( &endiandata[19], n );
|
||||
anime_hash( hash, endiandata );
|
||||
pdata[19] = n;
|
||||
|
||||
if ( ( hash[7] & mask ) == 0 && fulltest( hash, ptarget ) )
|
||||
submit_solution( work, hash, mythr );
|
||||
n++;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
do
|
||||
{
|
||||
edata[19] = n;
|
||||
anime_hash( hash, edata );
|
||||
if ( valid_hash( hash, ptarget ) && !bench )
|
||||
{
|
||||
be32enc( &pdata[19], n );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,7 @@
|
||||
#include "hmq1725-gate.h"
|
||||
|
||||
#if !defined(HMQ1725_8WAY) && !defined(HMQ1725_4WAY)
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
@@ -7,10 +10,7 @@
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/luffa/sph_luffa.h"
|
||||
#include "algo/cubehash/sph_cubehash.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/fugue/sph_fugue.h"
|
||||
@@ -21,6 +21,9 @@
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#endif
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
@@ -392,3 +395,4 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -72,12 +72,10 @@ void quark_8way_hash( void *state, const void *input )
|
||||
|
||||
memcpy( &ctx, &quark_8way_ctx, sizeof(quark_8way_ctx) );
|
||||
|
||||
blake512_8way_update( &ctx.blake, input, 80 );
|
||||
blake512_8way_close( &ctx.blake, vhash );
|
||||
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
@@ -86,70 +84,34 @@ void quark_8way_hash( void *state, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||
{
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
||||
}
|
||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||
{
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
||||
}
|
||||
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
||||
|
||||
#else
|
||||
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 512 );
|
||||
|
||||
if ( hash0[0] & mask )
|
||||
{
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
}
|
||||
if ( hash1[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
}
|
||||
if ( hash2[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
}
|
||||
if ( hash3[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
}
|
||||
if ( hash4[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
||||
(char*)hash4, 512 );
|
||||
}
|
||||
if ( hash5[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
||||
(char*)hash5, 512 );
|
||||
}
|
||||
if ( hash6[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
||||
(char*)hash6, 512 );
|
||||
}
|
||||
if ( hash7[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
||||
(char*)hash7, 512 );
|
||||
}
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
if ( hash4[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
if ( hash5[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
if ( hash6[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
if ( hash7[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64( vhashC, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||
hash7, 512 );
|
||||
@@ -157,10 +119,7 @@ void quark_8way_hash( void *state, const void *input )
|
||||
#endif
|
||||
|
||||
if ( vh_mask & 0xff )
|
||||
{
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
||||
|
||||
@@ -168,10 +127,10 @@ void quark_8way_hash( void *state, const void *input )
|
||||
|
||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
||||
groestl512_4way_init( &ctx.groestl, 64 );
|
||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||
|
||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||
|
||||
@@ -180,22 +139,22 @@ void quark_8way_hash( void *state, const void *input )
|
||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
vhash, 512 );
|
||||
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
if ( hash4[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||
if ( hash5[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||
if ( hash6[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||
if ( hash7[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||
|
||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||
512 );
|
||||
@@ -209,27 +168,16 @@ void quark_8way_hash( void *state, const void *input )
|
||||
zero );
|
||||
|
||||
if ( ( vh_mask & 0xff ) != 0xff )
|
||||
{
|
||||
blake512_8way_init( &ctx.blake );
|
||||
blake512_8way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_8way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
|
||||
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
if ( vh_mask & 0xff )
|
||||
{
|
||||
bmw512_8way_init( &ctx.bmw );
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhashB );
|
||||
}
|
||||
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
|
||||
|
||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_8way_init( &ctx.skein );
|
||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_8way_close( &ctx.skein, vhash );
|
||||
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
@@ -258,41 +206,44 @@ void quark_8way_hash( void *state, const void *input )
|
||||
int scanhash_quark_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash64[4*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[49]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint64_t *hash64_q3 = &(hash64[3*8]);
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
quark_8way_hash( hash64, vdata );
|
||||
|
||||
quark_8way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( hash7[ i<<1 ] <= Htarg ) )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, i, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
extr_lane_8x64( lane_hash, hash64, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart );
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
@@ -333,67 +284,47 @@ void quark_4way_hash( void *state, const void *input )
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
int h_mask;
|
||||
quark_4way_ctx_holder ctx;
|
||||
const __m256i bit3_mask = m256_const1_64( 8 );
|
||||
const uint32_t mask = 8;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
|
||||
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
||||
|
||||
blake512_4way_update( &ctx.blake, input, 80 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
if ( hash0[0] & mask )
|
||||
{
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
}
|
||||
if ( hash1[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
}
|
||||
if ( hash2[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
}
|
||||
if ( hash3[0] & mask )
|
||||
{
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
}
|
||||
// A
|
||||
if ( hash0[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
if ( hash1[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
if ( hash2[0] & 8)
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
if ( hash3[0] & 8 )
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhashB );
|
||||
}
|
||||
// B
|
||||
if ( likely( h_mask & 0xffffffff ) )
|
||||
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
@@ -401,15 +332,13 @@ void quark_4way_hash( void *state, const void *input )
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
// A
|
||||
if ( likely( ( h_mask & 0xffffffff ) != 0xffffffff ) )
|
||||
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||
// B
|
||||
if ( likely( h_mask & 0xffffffff ) )
|
||||
{
|
||||
bmw512_4way_init( &ctx.bmw );
|
||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||
@@ -421,20 +350,20 @@ void quark_4way_hash( void *state, const void *input )
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhash );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
// A
|
||||
if ( likely( ( h_mask & 0xffffffff ) != 0xffffffff ) )
|
||||
{
|
||||
keccak512_4way_init( &ctx.keccak );
|
||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||
}
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
// B
|
||||
if ( likely( h_mask & 0xffffffff ) )
|
||||
{
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||
@@ -451,41 +380,44 @@ void quark_4way_hash( void *state, const void *input )
|
||||
int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint64_t hash64[4*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint64_t *hash64_q3 = &(hash64[3*4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
quark_4way_hash( hash64, vdata );
|
||||
|
||||
quark_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( hash7[ i<<1 ] <= Htarg ) )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash64_q3[ lane ] <= targ64_q3 && !bench )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, i, 256 );
|
||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||
extr_lane_4x64( lane_hash, hash64, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1,5 +1,8 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "quark-gate.h"
|
||||
|
||||
#if !defined(QUARK_8WAY) && !defined(QUARK_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -137,4 +140,4 @@ int scanhash_quark( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -106,13 +106,13 @@ int scanhash_deep_2way( struct work *work,uint32_t max_nonce,
|
||||
if ( fulltest( hash, ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_lane_solution( work, hash, mythr, 0 );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
if ( !( (hash+8)[7] & mask ) )
|
||||
if ( fulltest( hash+8, ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+1;
|
||||
submit_lane_solution( work, hash+8, mythr, 1 );
|
||||
submit_solution( work, hash+8, mythr );
|
||||
}
|
||||
n += 2;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
|
@@ -1,4 +1,7 @@
|
||||
#include "deep-gate.h"
|
||||
|
||||
#if !defined(DEEP_8WAY) && !defined(DEEP_4WAY)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -114,4 +117,4 @@ int scanhash_deep( struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -153,7 +153,7 @@ int scanhash_qubit_4way( struct work *work,uint32_t max_nonce,
|
||||
if ( likely( fulltest( hash+(lane<<3), ptarget) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
submit_solution( work, hash+(lane<<3), mythr );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
|
||||
@@ -255,13 +255,13 @@ int scanhash_qubit_2way( struct work *work,uint32_t max_nonce,
|
||||
if ( likely( fulltest( hash, ptarget) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_lane_solution( work, hash, mythr, 0 );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
if ( unlikely( ( (hash+8))[7] <= Htarg ) )
|
||||
if ( likely( fulltest( hash+8, ptarget) && !opt_benchmark ) )
|
||||
{
|
||||
pdata[19] = n+1;
|
||||
submit_lane_solution( work, hash+8, mythr, 1 );
|
||||
submit_solution( work, hash+8, mythr );
|
||||
}
|
||||
n += 2;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user