mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
26 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
51a1d91abd | ||
![]() |
13563e2598 | ||
![]() |
9571f85d53 | ||
![]() |
0e69756634 | ||
![]() |
9653bca1e2 | ||
![]() |
1c0719e8a4 | ||
![]() |
8b4b4dc613 | ||
![]() |
e76feaced8 | ||
![]() |
5e088d00d0 | ||
![]() |
972d4d70db | ||
![]() |
e96a6bd699 | ||
![]() |
fb9163185a | ||
![]() |
6e8b8ed34f | ||
![]() |
c0aadbcc99 | ||
![]() |
3da149418a | ||
![]() |
720610cce5 | ||
![]() |
cedcf4d070 | ||
![]() |
81b50c3c71 | ||
![]() |
0e1e88f53e | ||
![]() |
45c77a5c81 | ||
![]() |
dbce7e0721 | ||
![]() |
6d66051de6 | ||
![]() |
b93be8816a | ||
![]() |
19b0ac6d5c | ||
![]() |
3da2b958cf | ||
![]() |
dc2f8d81d3 |
@@ -163,6 +163,7 @@ cpuminer_SOURCES = \
|
|||||||
algo/sha/sha256-hash-4way.c \
|
algo/sha/sha256-hash-4way.c \
|
||||||
algo/sha/sha512-hash-4way.c \
|
algo/sha/sha512-hash-4way.c \
|
||||||
algo/sha/hmac-sha256-hash.c \
|
algo/sha/hmac-sha256-hash.c \
|
||||||
|
algo/sha/hmac-sha256-hash-4way.c \
|
||||||
algo/sha/sha2.c \
|
algo/sha/sha2.c \
|
||||||
algo/sha/sha256t-gate.c \
|
algo/sha/sha256t-gate.c \
|
||||||
algo/sha/sha256t-4way.c \
|
algo/sha/sha256t-4way.c \
|
||||||
@@ -256,6 +257,7 @@ cpuminer_SOURCES = \
|
|||||||
algo/x16/hex.c \
|
algo/x16/hex.c \
|
||||||
algo/x16/x21s-4way.c \
|
algo/x16/x21s-4way.c \
|
||||||
algo/x16/x21s.c \
|
algo/x16/x21s.c \
|
||||||
|
algo/x16/minotaur.c \
|
||||||
algo/x17/x17-gate.c \
|
algo/x17/x17-gate.c \
|
||||||
algo/x17/x17.c \
|
algo/x17/x17.c \
|
||||||
algo/x17/x17-4way.c \
|
algo/x17/x17-4way.c \
|
||||||
|
56
README.md
56
README.md
@@ -12,10 +12,24 @@ a false positive, they are flagged simply because they are cryptocurrency
|
|||||||
miners. The source code is open for anyone to inspect. If you don't trust
|
miners. The source code is open for anyone to inspect. If you don't trust
|
||||||
the software, don't use it.
|
the software, don't use it.
|
||||||
|
|
||||||
|
|
||||||
|
New thread:
|
||||||
|
|
||||||
|
https://bitcointalk.org/index.php?topic=5226770.msg53865575#msg53865575
|
||||||
|
|
||||||
|
Old thread:
|
||||||
|
|
||||||
https://bitcointalk.org/index.php?topic=1326803.0
|
https://bitcointalk.org/index.php?topic=1326803.0
|
||||||
|
|
||||||
mailto://jayddee246@gmail.com
|
mailto://jayddee246@gmail.com
|
||||||
|
|
||||||
|
This note is to confirm that bitcointalk users JayDDee and joblo are the
|
||||||
|
same person.
|
||||||
|
|
||||||
|
I created a new BCT user JayDDee to match my github user id.
|
||||||
|
The old thread has been locked but still contains useful information for
|
||||||
|
reading.
|
||||||
|
|
||||||
See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
|
See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
|
||||||
for compile instructions.
|
for compile instructions.
|
||||||
|
|
||||||
@@ -23,25 +37,25 @@ Requirements
|
|||||||
------------
|
------------
|
||||||
|
|
||||||
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
|
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
|
||||||
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
|
Intel Core2 and newer and AMD equivalents. Further optimizations are available
|
||||||
optimizations a CPU with AES_NI is required. This includes Intel Westmere
|
on some algoritms for CPUs with AES, AVX, AVX2, SHA, AVX512 and VAES.
|
||||||
and newer and AMD equivalents. Further optimizations are available on some
|
|
||||||
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
|
|
||||||
|
|
||||||
Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
|
Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
|
||||||
performance.
|
performance.
|
||||||
|
|
||||||
ARM CPUs are not supported.
|
ARM and Aarch64 CPUs are not supported.
|
||||||
|
|
||||||
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
|
2. 64 bit Linux or Windows OS. Ubuntu and Fedora based distributions,
|
||||||
Centos, are known to work and have all dependencies in their repositories.
|
including Mint and Centos, are known to work and have all dependencies
|
||||||
Others may work but may require more effort. Older versions such as Centos 6
|
in their repositories. Others may work but may require more effort. Older
|
||||||
don't work due to missing features.
|
versions such as Centos 6 don't work due to missing features.
|
||||||
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
|
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
|
||||||
|
|
||||||
MacOS, OSx and Android are not supported.
|
MacOS, OSx and Android are not supported.
|
||||||
|
|
||||||
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
|
3. Stratum pool supporting stratum+tcp:// or stratum+ssl:// protocols or
|
||||||
|
RPC getwork using http:// or https://.
|
||||||
|
GBT is YMMV.
|
||||||
|
|
||||||
Supported Algorithms
|
Supported Algorithms
|
||||||
--------------------
|
--------------------
|
||||||
@@ -79,6 +93,7 @@ Supported Algorithms
|
|||||||
lyra2z
|
lyra2z
|
||||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||||
m7m Magi (XMG)
|
m7m Magi (XMG)
|
||||||
|
minotaur Ringcoin (RNG)
|
||||||
myr-gr Myriad-Groestl
|
myr-gr Myriad-Groestl
|
||||||
neoscrypt NeoScrypt(128, 2, 1)
|
neoscrypt NeoScrypt(128, 2, 1)
|
||||||
nist5 Nist5
|
nist5 Nist5
|
||||||
@@ -138,6 +153,27 @@ Supported Algorithms
|
|||||||
yespower-b2b generic yespower + blake2b
|
yespower-b2b generic yespower + blake2b
|
||||||
zr5 Ziftr
|
zr5 Ziftr
|
||||||
|
|
||||||
|
Many variations of scrypt based algos can be mine by specifying their
|
||||||
|
parameters:
|
||||||
|
|
||||||
|
scryptn2: --algo scrypt --param-n 1048576
|
||||||
|
|
||||||
|
cpupower: --algo yespower --param-key "CPUpower: The number of CPU working or available for proof-of-work mining"
|
||||||
|
|
||||||
|
power2b: --algo yespower-b2b --param-n 2048 --param-r 32 --param-key "Now I am become Death, the destroyer of worlds"
|
||||||
|
|
||||||
|
sugarchain: --algo yespower --param-n 2048 -param-r 32 --param-key "Satoshi Nakamoto 31/Oct/2008 Proof-of-work is essentially one-CPU-one-vote"
|
||||||
|
|
||||||
|
yespoweriots: --algo yespower --param-n 2048 --param-key "Iots is committed to the development of IOT"
|
||||||
|
|
||||||
|
yespowerlitb: --algo yespower --param-n 2048 --param-r 32 --param-key "LITBpower: The number of LITB working or available for proof-of-work mini"
|
||||||
|
|
||||||
|
yespoweric: --algo yespower --param-n 2048 --param-r 32 --param-key "IsotopeC"
|
||||||
|
|
||||||
|
yespowerurx: --algo yespower --param-n 2048 --param-r 32 --param-key "UraniumX"
|
||||||
|
|
||||||
|
yespowerltncg: --algo yespower --param-n 2048 --param-r 32 --param-key "LTNCGYES"
|
||||||
|
|
||||||
Errata
|
Errata
|
||||||
------
|
------
|
||||||
|
|
||||||
|
31
README.txt
31
README.txt
@@ -1,8 +1,8 @@
|
|||||||
This file is included in the Windows binary package. Compile instructions
|
This file is included in the Windows binary package. Compile instructions
|
||||||
for Linux and Windows can be found in RELEASE_NOTES.
|
for Linux and Windows can be found in RELEASE_NOTES.
|
||||||
|
|
||||||
cpuminer is a console program that is executed from a DOS command prompt.
|
cpuminer is a console program that is executed from a DOS or Powershell
|
||||||
There is no GUI and no mouse support.
|
prompt. There is no GUI and no mouse support.
|
||||||
|
|
||||||
Miner programs are often flagged as malware by antivirus programs. This is
|
Miner programs are often flagged as malware by antivirus programs. This is
|
||||||
a false positive, they are flagged simply because they are cryptocurrency
|
a false positive, they are flagged simply because they are cryptocurrency
|
||||||
@@ -15,8 +15,8 @@ the features listed at cpuminer startup to ensure you are mining at
|
|||||||
optimum speed using the best available features.
|
optimum speed using the best available features.
|
||||||
|
|
||||||
Architecture names and compile options used are only provided for Intel
|
Architecture names and compile options used are only provided for Intel
|
||||||
Core series. Budget CPUs like Pentium and Celeron are often missing the
|
Core series. Budget CPUs like Pentium and Celeron are often missing some
|
||||||
latest features.
|
features.
|
||||||
|
|
||||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||||
@@ -31,14 +31,29 @@ https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures
|
|||||||
https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
|
https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
|
||||||
|
|
||||||
|
|
||||||
Exe name Compile flags Arch name
|
Exe file name Compile flags Arch name
|
||||||
|
|
||||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
||||||
cpuminer-avx.exe "-march=corei7-avx" Sandybridge
|
cpuminer-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||||
cpuminer-avx2.exe "-march=core-avx2 -maes" Haswell, Skylake, Coffeelake
|
cpuminer-avx2.exe "-march=core-avx2 -maes" Haswell*
|
||||||
cpuminer-avx512.exe "-march=skylake-avx512" Skylake-X, Cascadelake-X
|
cpuminer-avx512.exe "-march=skylake-avx512" Skylake-X, Cascadelake-X
|
||||||
cpuminer-zen "-march=znver1" AMD Ryzen, Threadripper
|
cpuminer-zen.exe "-march=znver1" AMD Ryzen, Threadripper
|
||||||
|
cpuminer-avx512-sha-vaes.exe "-march=icelake-client" Icelake*
|
||||||
|
|
||||||
|
* Haswell includes Broadwell, Skylake, Kabylake, Coffeelake & Cometlake.
|
||||||
|
Icelake is only available on some laptops. Mining with a laptop is not
|
||||||
|
recommended. The icelake build is included in anticipation of Intel eventually
|
||||||
|
releasing a desktop CPU with a microarchitecture newer than Skylake.
|
||||||
|
|
||||||
|
Notes about included DLL files:
|
||||||
|
|
||||||
|
Downloading DLL files from alternative sources presents an inherent
|
||||||
|
security risk if their source is unknown. All DLL files included have
|
||||||
|
been copied from the Ubuntu-20.04 instalation or compiled by me from
|
||||||
|
source code obtained from the author's official repository. The exact
|
||||||
|
procedure is documented in the build instructions for Windows:
|
||||||
|
https://github.com/JayDDee/cpuminer-opt/wiki/Compiling-from-source
|
||||||
|
|
||||||
If you like this software feel free to donate:
|
If you like this software feel free to donate:
|
||||||
|
|
||||||
|
202
RELEASE_NOTES
202
RELEASE_NOTES
@@ -65,6 +65,208 @@ If not what makes it happen or not happen?
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v3.14.2
|
||||||
|
|
||||||
|
The second line of the Share Accepted log is no longer displayed,
|
||||||
|
new Xnonce log is added and other small log tweaks.
|
||||||
|
|
||||||
|
#265: Cleanup use of mutex.
|
||||||
|
|
||||||
|
v3.14.1
|
||||||
|
|
||||||
|
GBT and getwork log changes:
|
||||||
|
fixed missing TTF in New Block log,
|
||||||
|
ntime no longer byte-swapped for display in New Work log,
|
||||||
|
fixed zero effective hash rate in Periodic Report log,
|
||||||
|
deleted "Current block is..." log.
|
||||||
|
|
||||||
|
Renamed stratum "New Job" log to "New Work" to be consistent with the solo
|
||||||
|
version of the log. Added more data to both versions.
|
||||||
|
|
||||||
|
v3.14.0
|
||||||
|
|
||||||
|
Changes to solo mining:
|
||||||
|
- segwit is supported by getblocktemplate,
|
||||||
|
- longpolling is not working and is disabled,
|
||||||
|
- Periodic Report log is output,
|
||||||
|
- New Block log includes TTF estimates,
|
||||||
|
- Stratum thread no longer created when using getwork or GBT.
|
||||||
|
|
||||||
|
Fixed BUG log mining sha256d.
|
||||||
|
|
||||||
|
v3.13.1.1
|
||||||
|
|
||||||
|
Fixed Windows crash mining minotaur algo.
|
||||||
|
|
||||||
|
Fixed GCC 10 compile again.
|
||||||
|
Added -fno-common to testing to be consistent with GCC 10 default.
|
||||||
|
|
||||||
|
v3.13.1
|
||||||
|
|
||||||
|
Added minotaur algo for Ringcoin.
|
||||||
|
|
||||||
|
v3.13.0.1
|
||||||
|
|
||||||
|
Issue #262: Fixed xevan AVX2 invalid shares.
|
||||||
|
|
||||||
|
v3.13.0
|
||||||
|
|
||||||
|
Updated Windows binaries compiled with GCC 9. Included DLLs also updated.
|
||||||
|
Icelake build (cpuminer-avx512-sha-vaes.exe) now included in Windows
|
||||||
|
binaries package.
|
||||||
|
|
||||||
|
No source code changes.
|
||||||
|
|
||||||
|
v3.12.8.2
|
||||||
|
|
||||||
|
Fixed x12 AVX2 rejects.
|
||||||
|
Fixed phi AVX2 crash.
|
||||||
|
|
||||||
|
v3.12.8.1
|
||||||
|
|
||||||
|
Issue #261: Fixed yescryptr8g invalid shares.
|
||||||
|
|
||||||
|
v3.12.8
|
||||||
|
|
||||||
|
Yespower sha256 prehash made thread safe.
|
||||||
|
|
||||||
|
Rewrote diff conversion functions from scratch to be simpler and use
|
||||||
|
long double (float80) and int128 arithmetic for improved accuracy and
|
||||||
|
precision.
|
||||||
|
|
||||||
|
Some code cleanup and assorted small changes.
|
||||||
|
|
||||||
|
v3.12.7
|
||||||
|
|
||||||
|
Issue #257: fixed a file descriptor leak which caused the CPU temperature
|
||||||
|
and frequency query to report zeros after mining for a couple of hours.
|
||||||
|
|
||||||
|
Issue #253: stale share reduction for yescrypt, sonoa.
|
||||||
|
|
||||||
|
v3.12.6.1
|
||||||
|
|
||||||
|
Issue #252: Fixed SSL mining (stratum+tcps://)
|
||||||
|
|
||||||
|
Issue #254 Fixed benchmark.
|
||||||
|
|
||||||
|
Issue #253: Implemented stale share reduction for yespower, x25x, x22i, x21s,
|
||||||
|
x16*, scryptn2, more to come.
|
||||||
|
|
||||||
|
v3.12.6
|
||||||
|
|
||||||
|
Issue #246: improved stale share detection for getwork.
|
||||||
|
|
||||||
|
Improved precision of target_to_diff conversion from 4 digits to 20+.
|
||||||
|
|
||||||
|
Display hash and target debug data for all rejected shares.
|
||||||
|
|
||||||
|
A graphical representation of CPU affinity is displayed when using --threads.
|
||||||
|
|
||||||
|
Added highest and lowest accepted share to summary log.
|
||||||
|
|
||||||
|
Other small changes to logs to improve consistency and clarity.
|
||||||
|
|
||||||
|
v3.12.5
|
||||||
|
|
||||||
|
Issues #246 & #251: fixed incorrect share diff for stratum and getwork,
|
||||||
|
fixed incorrect target diff for getwork. Stats should now be correct for
|
||||||
|
getwork as well as stratum.
|
||||||
|
|
||||||
|
Issue #252: Fixed stratum+tcps not using curl ssl.
|
||||||
|
|
||||||
|
Getwork: reduce stale blocks, faster response to new work.
|
||||||
|
|
||||||
|
Added ntime to new job/work logs.
|
||||||
|
|
||||||
|
README.md now lists the parameters for yespower variations that don't have
|
||||||
|
a specific algo name.
|
||||||
|
|
||||||
|
v3.12.4.6
|
||||||
|
|
||||||
|
Issue #246: fixed getwork repeated new block logs with same height. New work
|
||||||
|
for the same block is now reported as "New work" instead of "New block".
|
||||||
|
Also added a check that work is new before generating "New work" log.
|
||||||
|
|
||||||
|
Added target diff to getwork new block log.
|
||||||
|
|
||||||
|
Changed share ratio in share result log to simple fraction, no longer %.
|
||||||
|
|
||||||
|
Added debug log to display mininginfo, use -D.
|
||||||
|
|
||||||
|
v3.12.4.5
|
||||||
|
|
||||||
|
Issue #246: better stale share detection for getwork, and enhanced logging
|
||||||
|
of stale shares for stratum & getwork.
|
||||||
|
|
||||||
|
Issue #251: fixed incorrect share difficulty and share ratio in share
|
||||||
|
result log.
|
||||||
|
|
||||||
|
Changed submit log to include share diff and block height.
|
||||||
|
|
||||||
|
Small cosmetic changes to logs.
|
||||||
|
|
||||||
|
v3.12.4.4
|
||||||
|
|
||||||
|
Issue #246: Fixed net hashrate in getwork block log,
|
||||||
|
removed duplicate getwork block log,
|
||||||
|
other small tweaks to stats logs for getwork.
|
||||||
|
|
||||||
|
Issue #248: Fixed chronic stale shares with scrypt:1048576 (scryptn2).
|
||||||
|
|
||||||
|
v3.12.4.3
|
||||||
|
|
||||||
|
Fixed segfault in new block log for getwork.
|
||||||
|
|
||||||
|
Disabled silent discarding of stale work after the submit is logged.
|
||||||
|
|
||||||
|
v3.12.4.2
|
||||||
|
|
||||||
|
Issue #245: fixed getwork stale shares, solo mining with getwork now works.
|
||||||
|
|
||||||
|
Issue #246: implemented block and summary logs for getwork.
|
||||||
|
|
||||||
|
v3.12.4.1
|
||||||
|
|
||||||
|
Issue #245: fix scantime when mining solo with getwork.
|
||||||
|
|
||||||
|
Added debug logs for creation of stratum and longpoll threads, use -D to
|
||||||
|
enable.
|
||||||
|
|
||||||
|
v3.12.4
|
||||||
|
|
||||||
|
Issue #244: Change longpoll to ignore job id.
|
||||||
|
|
||||||
|
Lyra2rev2 AVX2 +3%, AVX512 +6%.
|
||||||
|
|
||||||
|
v3.12.3.1
|
||||||
|
|
||||||
|
Issue #241: Fixed regression that broke coinbase address in v3.11.7.
|
||||||
|
|
||||||
|
v3.12.3
|
||||||
|
|
||||||
|
Issue #238: Fixed skunk AVX2.
|
||||||
|
|
||||||
|
Issue #239: Faster AVX2 & AVX512 for skein +44%, skein2 +30%, plus marginal
|
||||||
|
increases for skunk, x16r, x16rv2, x16rt, x16rt-veil, x16s, x21s.
|
||||||
|
|
||||||
|
Faster anime VAES +57%, AVX512 +21%, AVX2 +3%.
|
||||||
|
|
||||||
|
Redesigned code reponsible for #236.
|
||||||
|
|
||||||
|
v3.12.2
|
||||||
|
|
||||||
|
Fixed xevan, skein, skein2 AVX2, #238.
|
||||||
|
|
||||||
|
Reversed polarity of AVX2 vector bit test utilities, and all users, to be
|
||||||
|
logically and semantically correct. Follow up to issue #236.
|
||||||
|
|
||||||
|
v3.12.1
|
||||||
|
|
||||||
|
Fixed anime AVX2 low difficulty shares, git issue #236.
|
||||||
|
|
||||||
|
Periodic summary now reports lost hash rate due to rejected and stale shares,
|
||||||
|
displayed only when non-zero.
|
||||||
|
|
||||||
v3.12.0.1
|
v3.12.0.1
|
||||||
|
|
||||||
Fixed hodl rejects, git issue #237.
|
Fixed hodl rejects, git issue #237.
|
||||||
|
191
aclocal.m4
vendored
191
aclocal.m4
vendored
@@ -1,6 +1,6 @@
|
|||||||
# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
|
# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
|
|||||||
If you have problems, you may need to regenerate the build system entirely.
|
If you have problems, you may need to regenerate the build system entirely.
|
||||||
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
||||||
|
|
||||||
# Copyright (C) 2002-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2002-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
|
|||||||
# generated from the m4 files accompanying Automake X.Y.
|
# generated from the m4 files accompanying Automake X.Y.
|
||||||
# (This private macro should not be called outside this file.)
|
# (This private macro should not be called outside this file.)
|
||||||
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||||
[am__api_version='1.15'
|
[am__api_version='1.16'
|
||||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||||
dnl require some minimum version. Point them to the right macro.
|
dnl require some minimum version. Point them to the right macro.
|
||||||
m4_if([$1], [1.15.1], [],
|
m4_if([$1], [1.16.1], [],
|
||||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||||
])
|
])
|
||||||
|
|
||||||
@@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
|
|||||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||||
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
||||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||||
[AM_AUTOMAKE_VERSION([1.15.1])dnl
|
[AM_AUTOMAKE_VERSION([1.16.1])dnl
|
||||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||||
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
||||||
|
|
||||||
# Figure out how to run the assembler. -*- Autoconf -*-
|
# Figure out how to run the assembler. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -78,7 +78,7 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
|
|||||||
|
|
||||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -130,7 +130,7 @@ am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
|||||||
|
|
||||||
# AM_CONDITIONAL -*- Autoconf -*-
|
# AM_CONDITIONAL -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1997-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -161,7 +161,7 @@ AC_CONFIG_COMMANDS_PRE(
|
|||||||
Usually this means the macro was only invoked conditionally.]])
|
Usually this means the macro was only invoked conditionally.]])
|
||||||
fi])])
|
fi])])
|
||||||
|
|
||||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -352,13 +352,12 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
|
|||||||
|
|
||||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
# with or without modifications, as long as this notice is preserved.
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
|
||||||
# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||||
@@ -366,49 +365,41 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
|||||||
# Older Autoconf quotes --file arguments for eval, but not when files
|
# Older Autoconf quotes --file arguments for eval, but not when files
|
||||||
# are listed without --file. Let's play safe and only enable the eval
|
# are listed without --file. Let's play safe and only enable the eval
|
||||||
# if we detect the quoting.
|
# if we detect the quoting.
|
||||||
case $CONFIG_FILES in
|
# TODO: see whether this extra hack can be removed once we start
|
||||||
*\'*) eval set x "$CONFIG_FILES" ;;
|
# requiring Autoconf 2.70 or later.
|
||||||
*) set x $CONFIG_FILES ;;
|
AS_CASE([$CONFIG_FILES],
|
||||||
esac
|
[*\'*], [eval set x "$CONFIG_FILES"],
|
||||||
|
[*], [set x $CONFIG_FILES])
|
||||||
shift
|
shift
|
||||||
for mf
|
# Used to flag and report bootstrapping failures.
|
||||||
|
am_rc=0
|
||||||
|
for am_mf
|
||||||
do
|
do
|
||||||
# Strip MF so we end up with the name of the file.
|
# Strip MF so we end up with the name of the file.
|
||||||
mf=`echo "$mf" | sed -e 's/:.*$//'`
|
am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'`
|
||||||
# Check whether this is an Automake generated Makefile or not.
|
# Check whether this is an Automake generated Makefile which includes
|
||||||
# We used to match only the files named 'Makefile.in', but
|
# dependency-tracking related rules and includes.
|
||||||
# some people rename them; so instead we look at the file content.
|
# Grep'ing the whole file directly is not great: AIX grep has a line
|
||||||
# Grep'ing the first line is not enough: some people post-process
|
|
||||||
# each Makefile.in and add a new line on top of each file to say so.
|
|
||||||
# Grep'ing the whole file is not good either: AIX grep has a line
|
|
||||||
# limit of 2048, but all sed's we know have understand at least 4000.
|
# limit of 2048, but all sed's we know have understand at least 4000.
|
||||||
if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
|
sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
|
||||||
dirpart=`AS_DIRNAME("$mf")`
|
|| continue
|
||||||
else
|
am_dirpart=`AS_DIRNAME(["$am_mf"])`
|
||||||
continue
|
am_filepart=`AS_BASENAME(["$am_mf"])`
|
||||||
|
AM_RUN_LOG([cd "$am_dirpart" \
|
||||||
|
&& sed -e '/# am--include-marker/d' "$am_filepart" \
|
||||||
|
| $MAKE -f - am--depfiles]) || am_rc=$?
|
||||||
|
done
|
||||||
|
if test $am_rc -ne 0; then
|
||||||
|
AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
|
||||||
|
for automatic dependency tracking. Try re-running configure with the
|
||||||
|
'--disable-dependency-tracking' option to at least be able to build
|
||||||
|
the package (albeit without support for automatic dependency tracking).])
|
||||||
fi
|
fi
|
||||||
# Extract the definition of DEPDIR, am__include, and am__quote
|
AS_UNSET([am_dirpart])
|
||||||
# from the Makefile without running 'make'.
|
AS_UNSET([am_filepart])
|
||||||
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
|
AS_UNSET([am_mf])
|
||||||
test -z "$DEPDIR" && continue
|
AS_UNSET([am_rc])
|
||||||
am__include=`sed -n 's/^am__include = //p' < "$mf"`
|
rm -f conftest-deps.mk
|
||||||
test -z "$am__include" && continue
|
|
||||||
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
|
|
||||||
# Find all dependency output files, they are included files with
|
|
||||||
# $(DEPDIR) in their names. We invoke sed twice because it is the
|
|
||||||
# simplest approach to changing $(DEPDIR) to its actual value in the
|
|
||||||
# expansion.
|
|
||||||
for file in `sed -n "
|
|
||||||
s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
|
|
||||||
sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
|
|
||||||
# Make sure the directory exists.
|
|
||||||
test -f "$dirpart/$file" && continue
|
|
||||||
fdir=`AS_DIRNAME(["$file"])`
|
|
||||||
AS_MKDIR_P([$dirpart/$fdir])
|
|
||||||
# echo "creating $dirpart/$file"
|
|
||||||
echo '# dummy' > "$dirpart/$file"
|
|
||||||
done
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||||
|
|
||||||
@@ -417,18 +408,17 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
|||||||
# -----------------------------
|
# -----------------------------
|
||||||
# This macro should only be invoked once -- use via AC_REQUIRE.
|
# This macro should only be invoked once -- use via AC_REQUIRE.
|
||||||
#
|
#
|
||||||
# This code is only required when automatic dependency tracking
|
# This code is only required when automatic dependency tracking is enabled.
|
||||||
# is enabled. FIXME. This creates each '.P' file that we will
|
# This creates each '.Po' and '.Plo' makefile fragment that we'll need in
|
||||||
# need in order to bootstrap the dependency handling code.
|
# order to bootstrap the dependency handling code.
|
||||||
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||||
[AC_CONFIG_COMMANDS([depfiles],
|
[AC_CONFIG_COMMANDS([depfiles],
|
||||||
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
|
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||||
[AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
|
[AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])])
|
||||||
])
|
|
||||||
|
|
||||||
# Do all the work for Automake. -*- Autoconf -*-
|
# Do all the work for Automake. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -515,8 +505,8 @@ AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
|
|||||||
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
||||||
# For better backward compatibility. To be removed once Automake 1.9.x
|
# For better backward compatibility. To be removed once Automake 1.9.x
|
||||||
# dies out for good. For more background, see:
|
# dies out for good. For more background, see:
|
||||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
|
# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
|
||||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||||
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
|
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
|
||||||
# We need awk for the "check" target (and possibly the TAP driver). The
|
# We need awk for the "check" target (and possibly the TAP driver). The
|
||||||
# system "awk" is bad on some platforms.
|
# system "awk" is bad on some platforms.
|
||||||
@@ -583,7 +573,7 @@ END
|
|||||||
Aborting the configuration process, to ensure you take notice of the issue.
|
Aborting the configuration process, to ensure you take notice of the issue.
|
||||||
|
|
||||||
You can download and install GNU coreutils to get an 'rm' implementation
|
You can download and install GNU coreutils to get an 'rm' implementation
|
||||||
that behaves properly: <http://www.gnu.org/software/coreutils/>.
|
that behaves properly: <https://www.gnu.org/software/coreutils/>.
|
||||||
|
|
||||||
If you want to complete the configuration process using your problematic
|
If you want to complete the configuration process using your problematic
|
||||||
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
|
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
|
||||||
@@ -625,7 +615,7 @@ for _am_header in $config_headers :; do
|
|||||||
done
|
done
|
||||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||||
|
|
||||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -646,7 +636,7 @@ if test x"${install_sh+set}" != xset; then
|
|||||||
fi
|
fi
|
||||||
AC_SUBST([install_sh])])
|
AC_SUBST([install_sh])])
|
||||||
|
|
||||||
# Copyright (C) 2003-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2003-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -668,7 +658,7 @@ AC_SUBST([am__leading_dot])])
|
|||||||
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
|
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
|
||||||
# From Jim Meyering
|
# From Jim Meyering
|
||||||
|
|
||||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -703,7 +693,7 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
|||||||
|
|
||||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -711,49 +701,42 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
|||||||
|
|
||||||
# AM_MAKE_INCLUDE()
|
# AM_MAKE_INCLUDE()
|
||||||
# -----------------
|
# -----------------
|
||||||
# Check to see how make treats includes.
|
# Check whether make has an 'include' directive that can support all
|
||||||
|
# the idioms we need for our automatic dependency tracking code.
|
||||||
AC_DEFUN([AM_MAKE_INCLUDE],
|
AC_DEFUN([AM_MAKE_INCLUDE],
|
||||||
[am_make=${MAKE-make}
|
[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive])
|
||||||
cat > confinc << 'END'
|
cat > confinc.mk << 'END'
|
||||||
am__doit:
|
am__doit:
|
||||||
@echo this is the am__doit target
|
@echo this is the am__doit target >confinc.out
|
||||||
.PHONY: am__doit
|
.PHONY: am__doit
|
||||||
END
|
END
|
||||||
# If we don't find an include directive, just comment out the code.
|
|
||||||
AC_MSG_CHECKING([for style of include used by $am_make])
|
|
||||||
am__include="#"
|
am__include="#"
|
||||||
am__quote=
|
am__quote=
|
||||||
_am_result=none
|
# BSD make does it like this.
|
||||||
# First try GNU make style include.
|
echo '.include "confinc.mk" # ignored' > confmf.BSD
|
||||||
echo "include confinc" > confmf
|
# Other make implementations (GNU, Solaris 10, AIX) do it like this.
|
||||||
# Ignore all kinds of additional output from 'make'.
|
echo 'include confinc.mk # ignored' > confmf.GNU
|
||||||
case `$am_make -s -f confmf 2> /dev/null` in #(
|
_am_result=no
|
||||||
*the\ am__doit\ target*)
|
for s in GNU BSD; do
|
||||||
am__include=include
|
AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out])
|
||||||
am__quote=
|
AS_CASE([$?:`cat confinc.out 2>/dev/null`],
|
||||||
_am_result=GNU
|
['0:this is the am__doit target'],
|
||||||
;;
|
[AS_CASE([$s],
|
||||||
esac
|
[BSD], [am__include='.include' am__quote='"'],
|
||||||
# Now try BSD make style include.
|
[am__include='include' am__quote=''])])
|
||||||
if test "$am__include" = "#"; then
|
if test "$am__include" != "#"; then
|
||||||
echo '.include "confinc"' > confmf
|
_am_result="yes ($s style)"
|
||||||
case `$am_make -s -f confmf 2> /dev/null` in #(
|
break
|
||||||
*the\ am__doit\ target*)
|
|
||||||
am__include=.include
|
|
||||||
am__quote="\""
|
|
||||||
_am_result=BSD
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
fi
|
fi
|
||||||
AC_SUBST([am__include])
|
done
|
||||||
AC_SUBST([am__quote])
|
rm -f confinc.* confmf.*
|
||||||
AC_MSG_RESULT([$_am_result])
|
AC_MSG_RESULT([${_am_result}])
|
||||||
rm -f confinc confmf
|
AC_SUBST([am__include])])
|
||||||
])
|
AC_SUBST([am__quote])])
|
||||||
|
|
||||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1997-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -792,7 +775,7 @@ fi
|
|||||||
|
|
||||||
# Helper functions for option handling. -*- Autoconf -*-
|
# Helper functions for option handling. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -821,7 +804,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
|
|||||||
AC_DEFUN([_AM_IF_OPTION],
|
AC_DEFUN([_AM_IF_OPTION],
|
||||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||||
|
|
||||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -868,7 +851,7 @@ AC_LANG_POP([C])])
|
|||||||
# For backward compatibility.
|
# For backward compatibility.
|
||||||
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
|
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
|
||||||
|
|
||||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -887,7 +870,7 @@ AC_DEFUN([AM_RUN_LOG],
|
|||||||
|
|
||||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -968,7 +951,7 @@ AC_CONFIG_COMMANDS_PRE(
|
|||||||
rm -f conftest.file
|
rm -f conftest.file
|
||||||
])
|
])
|
||||||
|
|
||||||
# Copyright (C) 2009-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2009-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -1028,7 +1011,7 @@ AC_SUBST([AM_BACKSLASH])dnl
|
|||||||
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
|
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
|
||||||
])
|
])
|
||||||
|
|
||||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -1056,7 +1039,7 @@ fi
|
|||||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||||
|
|
||||||
# Copyright (C) 2006-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2006-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
@@ -1075,7 +1058,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
|
|||||||
|
|
||||||
# Check how to create a tarball. -*- Autoconf -*-
|
# Check how to create a tarball. -*- Autoconf -*-
|
||||||
|
|
||||||
# Copyright (C) 2004-2017 Free Software Foundation, Inc.
|
# Copyright (C) 2004-2018 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This file is free software; the Free Software Foundation
|
# This file is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy and/or distribute it,
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
@@ -90,33 +90,59 @@ void algo_not_implemented()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// default null functions
|
// default null functions
|
||||||
|
// deprecated, use generic as default
|
||||||
int null_scanhash()
|
int null_scanhash()
|
||||||
{
|
{
|
||||||
applog(LOG_WARNING,"SWERR: undefined scanhash function in algo_gate");
|
applog(LOG_WARNING,"SWERR: undefined scanhash function in algo_gate");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void null_hash()
|
// Default generic scanhash can be used in many cases.
|
||||||
|
int scanhash_generic( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t edata[20] __attribute__((aligned(64)));
|
||||||
|
uint32_t hash[8] __attribute__((aligned(64)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 1;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
mm128_bswap32_80( edata, pdata );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
edata[19] = n;
|
||||||
|
if ( likely( algo_gate.hash( hash, edata, thr_id ) ) )
|
||||||
|
if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n );
|
||||||
|
submit_solution( work, hash, mythr );
|
||||||
|
}
|
||||||
|
n++;
|
||||||
|
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
pdata[19] = n;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int null_hash()
|
||||||
{
|
{
|
||||||
applog(LOG_WARNING,"SWERR: null_hash unsafe null function");
|
applog(LOG_WARNING,"SWERR: null_hash unsafe null function");
|
||||||
};
|
return 0;
|
||||||
void null_hash_suw()
|
|
||||||
{
|
|
||||||
applog(LOG_WARNING,"SWERR: null_hash_suw unsafe null function");
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void init_algo_gate( algo_gate_t* gate )
|
void init_algo_gate( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
gate->miner_thread_init = (void*)&return_true;
|
gate->miner_thread_init = (void*)&return_true;
|
||||||
gate->scanhash = (void*)&null_scanhash;
|
gate->scanhash = (void*)&scanhash_generic;
|
||||||
gate->hash = (void*)&null_hash;
|
gate->hash = (void*)&null_hash;
|
||||||
gate->hash_suw = (void*)&null_hash_suw;
|
|
||||||
gate->get_new_work = (void*)&std_get_new_work;
|
gate->get_new_work = (void*)&std_get_new_work;
|
||||||
gate->work_decode = (void*)&std_le_work_decode;
|
gate->work_decode = (void*)&std_le_work_decode;
|
||||||
gate->decode_extra_data = (void*)&do_nothing;
|
gate->decode_extra_data = (void*)&do_nothing;
|
||||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||||
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
|
|
||||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
||||||
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
|
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
|
||||||
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
||||||
@@ -184,6 +210,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
|||||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||||
|
case ALGO_MINOTAUR: register_minotaur_algo ( gate ); break;
|
||||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||||
@@ -230,11 +257,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
|||||||
case ALGO_X22I: register_x22i_algo ( gate ); break;
|
case ALGO_X22I: register_x22i_algo ( gate ); break;
|
||||||
case ALGO_X25X: register_x25x_algo ( gate ); break;
|
case ALGO_X25X: register_x25x_algo ( gate ); break;
|
||||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||||
/* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break;
|
|
||||||
case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break;
|
|
||||||
case ALGO_YESCRYPTR16: register_yescryptr16_05_algo ( gate ); break;
|
|
||||||
case ALGO_YESCRYPTR32: register_yescryptr32_05_algo ( gate ); break;
|
|
||||||
*/
|
|
||||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||||
case ALGO_YESCRYPTR8G: register_yescryptr8g_algo ( gate ); break;
|
case ALGO_YESCRYPTR8G: register_yescryptr8g_algo ( gate ); break;
|
||||||
@@ -261,7 +283,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
|||||||
// restore warnings
|
// restore warnings
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
// run the alternate hash function for a specific algo
|
|
||||||
void exec_hash_function( int algo, void *output, const void *pdata )
|
void exec_hash_function( int algo, void *output, const void *pdata )
|
||||||
{
|
{
|
||||||
algo_gate_t gate;
|
algo_gate_t gate;
|
||||||
@@ -291,10 +312,7 @@ const char* const algo_alias_map[][2] =
|
|||||||
{ "blake256r8vnl", "vanilla" },
|
{ "blake256r8vnl", "vanilla" },
|
||||||
{ "blake256r14", "blake" },
|
{ "blake256r14", "blake" },
|
||||||
{ "blake256r14dcr", "decred" },
|
{ "blake256r14dcr", "decred" },
|
||||||
{ "cryptonote", "cryptonight" },
|
|
||||||
{ "cryptonight-light", "cryptolight" },
|
|
||||||
{ "diamond", "dmd-gr" },
|
{ "diamond", "dmd-gr" },
|
||||||
{ "droplp", "drop" },
|
|
||||||
{ "espers", "hmq1725" },
|
{ "espers", "hmq1725" },
|
||||||
{ "flax", "c11" },
|
{ "flax", "c11" },
|
||||||
{ "hsr", "x13sm3" },
|
{ "hsr", "x13sm3" },
|
||||||
@@ -307,6 +325,7 @@ const char* const algo_alias_map[][2] =
|
|||||||
{ "myriad", "myr-gr" },
|
{ "myriad", "myr-gr" },
|
||||||
{ "neo", "neoscrypt" },
|
{ "neo", "neoscrypt" },
|
||||||
{ "phi", "phi1612" },
|
{ "phi", "phi1612" },
|
||||||
|
{ "scryptn2", "scrypt:1048576" },
|
||||||
{ "sib", "x11gost" },
|
{ "sib", "x11gost" },
|
||||||
{ "timetravel8", "timetravel" },
|
{ "timetravel8", "timetravel" },
|
||||||
{ "veil", "x16rt-veil" },
|
{ "veil", "x16rt-veil" },
|
||||||
|
@@ -75,7 +75,7 @@
|
|||||||
|
|
||||||
// my hack at creating a set data type using bit masks. Set inclusion,
|
// my hack at creating a set data type using bit masks. Set inclusion,
|
||||||
// exclusion union and intersection operations are provided for convenience. In // some cases it may be desireable to use boolean algebra directly on the
|
// exclusion union and intersection operations are provided for convenience. In // some cases it may be desireable to use boolean algebra directly on the
|
||||||
// data to perfomr set operations. Sets can be represented as single
|
// data to perform set operations. Sets can be represented as single
|
||||||
// elements, a bitwise OR of multiple elements, a bitwise OR of multiple
|
// elements, a bitwise OR of multiple elements, a bitwise OR of multiple
|
||||||
// set variables or constants, or combinations of the above.
|
// set variables or constants, or combinations of the above.
|
||||||
// Examples:
|
// Examples:
|
||||||
@@ -110,12 +110,13 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
|
|||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
// mandatory functions, must be overwritten
|
// Mandatory functions, one of these is mandatory. If the default scanhash
|
||||||
|
// is used a custom hash function must be registered, with a custom scanhash
|
||||||
|
// the hash function is not necessary.
|
||||||
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
||||||
|
|
||||||
// optional unsafe, must be overwritten if algo uses function
|
//int ( *hash ) ( void*, const void*, uint32_t ) ;
|
||||||
void ( *hash ) ( void*, const void*, uint32_t ) ;
|
int ( *hash ) ( void*, const void*, int );
|
||||||
void ( *hash_suw ) ( void*, const void* );
|
|
||||||
|
|
||||||
//optional, safe to use default in most cases
|
//optional, safe to use default in most cases
|
||||||
|
|
||||||
@@ -123,14 +124,11 @@ void ( *hash_suw ) ( void*, const void* );
|
|||||||
// threads.
|
// threads.
|
||||||
bool ( *miner_thread_init ) ( int );
|
bool ( *miner_thread_init ) ( int );
|
||||||
|
|
||||||
// Generate global blockheader from stratum data.
|
|
||||||
void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
|
|
||||||
|
|
||||||
// Get thread local copy of blockheader with unique nonce.
|
// Get thread local copy of blockheader with unique nonce.
|
||||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
|
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
|
||||||
|
|
||||||
// Decode getwork blockheader
|
// Decode getwork blockheader
|
||||||
bool ( *work_decode ) ( const json_t*, struct work* );
|
bool ( *work_decode ) ( struct work* );
|
||||||
|
|
||||||
// Extra getwork data
|
// Extra getwork data
|
||||||
void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
||||||
@@ -165,7 +163,9 @@ bool ( *do_this_thread ) ( int );
|
|||||||
// After do_this_thread
|
// After do_this_thread
|
||||||
void ( *resync_threads ) ( struct work* );
|
void ( *resync_threads ) ( struct work* );
|
||||||
|
|
||||||
|
// No longer needed
|
||||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||||
|
|
||||||
set_t optimizations;
|
set_t optimizations;
|
||||||
int ( *get_work_data_size ) ();
|
int ( *get_work_data_size ) ();
|
||||||
int ntime_index;
|
int ntime_index;
|
||||||
@@ -209,25 +209,26 @@ void four_way_not_tested();
|
|||||||
#define JR2_WORK_CMP_INDEX_2 43
|
#define JR2_WORK_CMP_INDEX_2 43
|
||||||
#define JR2_WORK_CMP_SIZE_2 33
|
#define JR2_WORK_CMP_SIZE_2 33
|
||||||
|
|
||||||
// allways returns failure
|
// deprecated, use generic instead
|
||||||
int null_scanhash();
|
int null_scanhash();
|
||||||
|
|
||||||
|
// Default generic, may be used in many cases.
|
||||||
|
int scanhash_generic( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
// displays warning
|
// displays warning
|
||||||
void null_hash ();
|
int null_hash ();
|
||||||
void null_hash_suw();
|
|
||||||
|
|
||||||
// optional safe targets, default listed first unless noted.
|
// optional safe targets, default listed first unless noted.
|
||||||
|
|
||||||
void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
void std_get_new_work( struct work *work, struct work *g_work, int thr_id,
|
||||||
uint32_t* end_nonce_ptr );
|
uint32_t* end_nonce_ptr );
|
||||||
|
|
||||||
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *work );
|
|
||||||
|
|
||||||
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
|
void sha256d_gen_merkle_root( char *merkle_root, struct stratum_ctx *sctx );
|
||||||
void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
|
void SHA256_gen_merkle_root ( char *merkle_root, struct stratum_ctx *sctx );
|
||||||
|
|
||||||
bool std_le_work_decode( const json_t *val, struct work *work );
|
bool std_le_work_decode( struct work *work );
|
||||||
bool std_be_work_decode( const json_t *val, struct work *work );
|
bool std_be_work_decode( struct work *work );
|
||||||
|
|
||||||
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
|
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
|
||||||
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
|
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
|
||||||
@@ -250,10 +251,6 @@ void std_build_block_header( struct work* g_work, uint32_t version,
|
|||||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||||
|
|
||||||
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
|
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
|
||||||
//json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
|
|
||||||
|
|
||||||
//bool std_stratum_handle_response( json_t *val );
|
|
||||||
//bool jr2_stratum_handle_response( json_t *val );
|
|
||||||
|
|
||||||
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||||
int thr_id );
|
int thr_id );
|
||||||
@@ -272,11 +269,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate );
|
|||||||
// compiler warnings but that's just more work for devs adding new algos.
|
// compiler warnings but that's just more work for devs adding new algos.
|
||||||
bool register_algo( algo_gate_t *gate );
|
bool register_algo( algo_gate_t *gate );
|
||||||
|
|
||||||
// Overrides a common set of functions used by RPC2 and other RPC2-specific
|
|
||||||
// init. Called by algo's register function before initializing algo-specific
|
|
||||||
// functions and data.
|
|
||||||
//bool register_json_rpc2( algo_gate_t *gate );
|
|
||||||
|
|
||||||
// use this to call the hash function of an algo directly, ie util.c test.
|
// use this to call the hash function of an algo directly, ie util.c test.
|
||||||
void exec_hash_function( int algo, void *output, const void *pdata );
|
void exec_hash_function( int algo, void *output, const void *pdata );
|
||||||
|
|
||||||
|
@@ -48,7 +48,7 @@ int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
|
||||||
@@ -107,7 +107,7 @@ int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
|
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
|
|
||||||
|
@@ -45,7 +45,7 @@ int scanhash_blake2b_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -100,7 +100,7 @@ int scanhash_blake2b_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -49,7 +49,7 @@ int scanhash_blake2s_16way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 16;
|
n += 16;
|
||||||
@@ -104,7 +104,7 @@ int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -157,7 +157,7 @@ int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -49,7 +49,7 @@ int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
|||||||
&& !opt_benchmark )
|
&& !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
|
||||||
@@ -108,7 +108,7 @@ int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
|||||||
&& !opt_benchmark )
|
&& !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||||
|
@@ -62,7 +62,7 @@ int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[DECRED_NONCE_INDEX] = n+i;
|
pdata[DECRED_NONCE_INDEX] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||||
|
@@ -105,7 +105,7 @@ int scanhash_pentablake_4way( struct work *work,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + i;
|
pdata[19] = n + i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
|
||||||
|
@@ -138,7 +138,7 @@ void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
|
|||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
// BMW-512 4 way 64
|
// BMW-512 64 bit 4 way
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
__m256i buf[16];
|
__m256i buf[16];
|
||||||
@@ -149,7 +149,6 @@ typedef struct {
|
|||||||
|
|
||||||
typedef bmw_4way_big_context bmw512_4way_context;
|
typedef bmw_4way_big_context bmw512_4way_context;
|
||||||
|
|
||||||
|
|
||||||
void bmw512_4way_init(void *cc);
|
void bmw512_4way_init(void *cc);
|
||||||
|
|
||||||
void bmw512_4way_update(void *cc, const void *data, size_t len);
|
void bmw512_4way_update(void *cc, const void *data, size_t len);
|
||||||
@@ -164,6 +163,7 @@ void bmw512_4way_addbits_and_close(
|
|||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
|
// BMW-512 64 bit 8 way
|
||||||
typedef struct {
|
typedef struct {
|
||||||
__m512i buf[16];
|
__m512i buf[16];
|
||||||
__m512i H[16];
|
__m512i H[16];
|
||||||
@@ -171,6 +171,8 @@ typedef struct {
|
|||||||
uint64_t bit_count;
|
uint64_t bit_count;
|
||||||
} bmw512_8way_context __attribute__((aligned(128)));
|
} bmw512_8way_context __attribute__((aligned(128)));
|
||||||
|
|
||||||
|
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
|
||||||
|
size_t len );
|
||||||
void bmw512_8way_init( bmw512_8way_context *ctx );
|
void bmw512_8way_init( bmw512_8way_context *ctx );
|
||||||
void bmw512_8way_update( bmw512_8way_context *ctx, const void *data,
|
void bmw512_8way_update( bmw512_8way_context *ctx, const void *data,
|
||||||
size_t len );
|
size_t len );
|
||||||
|
@@ -46,7 +46,7 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -99,7 +99,7 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -1507,6 +1507,93 @@ void bmw512_8way_close( bmw512_8way_context *ctx, void *dst )
|
|||||||
casti_m512i( dst, u ) = h1[ v ];
|
casti_m512i( dst, u ) = h1[ v ];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bmw512_8way_full( bmw512_8way_context *ctx, void *out, const void *data,
|
||||||
|
size_t len )
|
||||||
|
{
|
||||||
|
__m512i *vdata = (__m512i*)data;
|
||||||
|
__m512i *buf = ctx->buf;
|
||||||
|
__m512i htmp[16];
|
||||||
|
__m512i *H = ctx->H;
|
||||||
|
__m512i *h2 = htmp;
|
||||||
|
uint64_t bit_count = len * 8;
|
||||||
|
size_t ptr = 0;
|
||||||
|
const int buf_size = 128; // bytes of one lane, compatible with len
|
||||||
|
|
||||||
|
// Init
|
||||||
|
|
||||||
|
H[ 0] = m512_const1_64( 0x8081828384858687 );
|
||||||
|
H[ 1] = m512_const1_64( 0x88898A8B8C8D8E8F );
|
||||||
|
H[ 2] = m512_const1_64( 0x9091929394959697 );
|
||||||
|
H[ 3] = m512_const1_64( 0x98999A9B9C9D9E9F );
|
||||||
|
H[ 4] = m512_const1_64( 0xA0A1A2A3A4A5A6A7 );
|
||||||
|
H[ 5] = m512_const1_64( 0xA8A9AAABACADAEAF );
|
||||||
|
H[ 6] = m512_const1_64( 0xB0B1B2B3B4B5B6B7 );
|
||||||
|
H[ 7] = m512_const1_64( 0xB8B9BABBBCBDBEBF );
|
||||||
|
H[ 8] = m512_const1_64( 0xC0C1C2C3C4C5C6C7 );
|
||||||
|
H[ 9] = m512_const1_64( 0xC8C9CACBCCCDCECF );
|
||||||
|
H[10] = m512_const1_64( 0xD0D1D2D3D4D5D6D7 );
|
||||||
|
H[11] = m512_const1_64( 0xD8D9DADBDCDDDEDF );
|
||||||
|
H[12] = m512_const1_64( 0xE0E1E2E3E4E5E6E7 );
|
||||||
|
H[13] = m512_const1_64( 0xE8E9EAEBECEDEEEF );
|
||||||
|
H[14] = m512_const1_64( 0xF0F1F2F3F4F5F6F7 );
|
||||||
|
H[15] = m512_const1_64( 0xF8F9FAFBFCFDFEFF );
|
||||||
|
|
||||||
|
// Update
|
||||||
|
|
||||||
|
while ( len > 0 )
|
||||||
|
{
|
||||||
|
size_t clen;
|
||||||
|
clen = buf_size - ptr;
|
||||||
|
if ( clen > len )
|
||||||
|
clen = len;
|
||||||
|
memcpy_512( buf + (ptr>>3), vdata, clen >> 3 );
|
||||||
|
vdata = vdata + (clen>>3);
|
||||||
|
len -= clen;
|
||||||
|
ptr += clen;
|
||||||
|
if ( ptr == buf_size )
|
||||||
|
{
|
||||||
|
__m512i *ht;
|
||||||
|
compress_big_8way( buf, H, h2 );
|
||||||
|
ht = H;
|
||||||
|
H = h2;
|
||||||
|
h2 = ht;
|
||||||
|
ptr = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( H != ctx->H )
|
||||||
|
memcpy_512( ctx->H, H, 16 );
|
||||||
|
|
||||||
|
// Close
|
||||||
|
{
|
||||||
|
__m512i h1[16], h2[16];
|
||||||
|
size_t u, v;
|
||||||
|
|
||||||
|
buf[ ptr>>3 ] = m512_const1_64( 0x80 );
|
||||||
|
ptr += 8;
|
||||||
|
|
||||||
|
if ( ptr > (buf_size - 8) )
|
||||||
|
{
|
||||||
|
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||||
|
compress_big_8way( buf, H, h1 );
|
||||||
|
ptr = 0;
|
||||||
|
H = h1;
|
||||||
|
}
|
||||||
|
memset_zero_512( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
|
||||||
|
buf[ (buf_size - 8) >> 3 ] = _mm512_set1_epi64( bit_count );
|
||||||
|
compress_big_8way( buf, H, h2 );
|
||||||
|
for ( u = 0; u < 16; u ++ )
|
||||||
|
buf[ u ] = h2[ u ];
|
||||||
|
compress_big_8way( buf, final_b8, h1 );
|
||||||
|
for (u = 0, v = 8; u < 8; u ++, v ++)
|
||||||
|
casti_m512i( out, u ) = h1[ v ];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@@ -179,14 +179,6 @@ int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
|||||||
sp->rounds = 16;
|
sp->rounds = 16;
|
||||||
sp->pos = 0;
|
sp->pos = 0;
|
||||||
|
|
||||||
h[ 0] = m512_const1_128( iv[0] );
|
|
||||||
h[ 1] = m512_const1_128( iv[1] );
|
|
||||||
h[ 2] = m512_const1_128( iv[2] );
|
|
||||||
h[ 3] = m512_const1_128( iv[3] );
|
|
||||||
h[ 4] = m512_const1_128( iv[4] );
|
|
||||||
h[ 5] = m512_const1_128( iv[5] );
|
|
||||||
h[ 6] = m512_const1_128( iv[6] );
|
|
||||||
h[ 7] = m512_const1_128( iv[7] );
|
|
||||||
h[ 0] = m512_const1_128( iv[0] );
|
h[ 0] = m512_const1_128( iv[0] );
|
||||||
h[ 1] = m512_const1_128( iv[1] );
|
h[ 1] = m512_const1_128( iv[1] );
|
||||||
h[ 2] = m512_const1_128( iv[2] );
|
h[ 2] = m512_const1_128( iv[2] );
|
||||||
@@ -447,14 +439,6 @@ int cube_2way_full( cube_2way_context *sp, void *output, int hashbitlen,
|
|||||||
sp->rounds = 16;
|
sp->rounds = 16;
|
||||||
sp->pos = 0;
|
sp->pos = 0;
|
||||||
|
|
||||||
h[ 0] = m256_const1_128( iv[0] );
|
|
||||||
h[ 1] = m256_const1_128( iv[1] );
|
|
||||||
h[ 2] = m256_const1_128( iv[2] );
|
|
||||||
h[ 3] = m256_const1_128( iv[3] );
|
|
||||||
h[ 4] = m256_const1_128( iv[4] );
|
|
||||||
h[ 5] = m256_const1_128( iv[5] );
|
|
||||||
h[ 6] = m256_const1_128( iv[6] );
|
|
||||||
h[ 7] = m256_const1_128( iv[7] );
|
|
||||||
h[ 0] = m256_const1_128( iv[0] );
|
h[ 0] = m256_const1_128( iv[0] );
|
||||||
h[ 1] = m256_const1_128( iv[1] );
|
h[ 1] = m256_const1_128( iv[1] );
|
||||||
h[ 2] = m256_const1_128( iv[2] );
|
h[ 2] = m256_const1_128( iv[2] );
|
||||||
|
@@ -28,6 +28,27 @@ int cube_4way_update_close( cube_4way_context *sp, void *output,
|
|||||||
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
int cube_4way_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||||
const void *data, size_t size );
|
const void *data, size_t size );
|
||||||
|
|
||||||
|
int cube_4x256_full( cube_4way_context *sp, void *output, int hashbitlen,
|
||||||
|
const void *data, size_t size );
|
||||||
|
|
||||||
|
#define cube512_4way_init( sp ) cube_4way_update( sp, 512 )
|
||||||
|
#define cube512_4way_update cube_4way_update
|
||||||
|
#define cube512_4way_update_close cube_4way_update
|
||||||
|
#define cube512_4way_close cube_4way_update
|
||||||
|
#define cube512_4way_full( sp, output, data, size ) \
|
||||||
|
cube_4way_full( sp, output, 512, data, size )
|
||||||
|
#define cube512_4x256_full( sp, output, data, size ) \
|
||||||
|
cube_4x256_full( sp, output, 512, data, size )
|
||||||
|
|
||||||
|
#define cube256_4way_init( sp ) cube_4way_update( sp, 256 )
|
||||||
|
#define cube256_4way_update cube_4way_update
|
||||||
|
#define cube256_4way_update_close cube_4way_update
|
||||||
|
#define cube256_4way_close cube_4way_update
|
||||||
|
#define cube256_4way_full( sp, output, data, size ) \
|
||||||
|
cube_4way_full( sp, output, 256, data, size )
|
||||||
|
#define cube256_4x256_full( sp, output, data, size ) \
|
||||||
|
cube_4x256_full( sp, output, 256, data, size )
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 2x128, 2 way parallel SSE2
|
// 2x128, 2 way parallel SSE2
|
||||||
|
@@ -22,18 +22,26 @@ typedef struct
|
|||||||
} echo_4way_context __attribute__ ((aligned (64)));
|
} echo_4way_context __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
int echo_4way_init( echo_4way_context *state, int hashbitlen );
|
int echo_4way_init( echo_4way_context *state, int hashbitlen );
|
||||||
|
#define echo512_4way_init( state ) echo_4way_init( state, 512 )
|
||||||
|
#define echo256_4way_init( state ) echo_4way_init( state, 256 )
|
||||||
|
|
||||||
int echo_4way_update( echo_4way_context *state, const void *data,
|
int echo_4way_update( echo_4way_context *state, const void *data,
|
||||||
unsigned int databitlen);
|
unsigned int databitlen);
|
||||||
|
#define echo512_4way_update echo_4way_update
|
||||||
|
|
||||||
int echo_close( echo_4way_context *state, void *hashval );
|
int echo_close( echo_4way_context *state, void *hashval );
|
||||||
|
#define echo512_4way_close echo_4way_close
|
||||||
|
|
||||||
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
int echo_4way_update_close( echo_4way_context *state, void *hashval,
|
||||||
const void *data, int databitlen );
|
const void *data, int databitlen );
|
||||||
|
#define echo512_4way_update_close echo_4way_update_close
|
||||||
|
|
||||||
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
int echo_4way_full( echo_4way_context *ctx, void *hashval, int nHashSize,
|
||||||
const void *data, int datalen );
|
const void *data, int datalen );
|
||||||
|
#define echo512_4way_full( state, hashval, data, datalen ) \
|
||||||
|
echo_4way_full( state, hashval, 512, data, datalen )
|
||||||
|
#define echo256_4way_full( state, hashval, data, datalen ) \
|
||||||
|
echo_4way_full( state, hashval, 256, data, datalen )
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@@ -74,6 +74,14 @@ void sph_fugue512_close(void *cc, void *dst);
|
|||||||
void sph_fugue512_addbits_and_close(
|
void sph_fugue512_addbits_and_close(
|
||||||
void *cc, unsigned ub, unsigned n, void *dst);
|
void *cc, unsigned ub, unsigned n, void *dst);
|
||||||
|
|
||||||
|
#define sph_fugue512_full( cc, dst, data, len ) \
|
||||||
|
do{ \
|
||||||
|
sph_fugue512_init( cc ); \
|
||||||
|
sph_fugue512( cc, data, len ); \
|
||||||
|
sph_fugue512_close( cc, dst ); \
|
||||||
|
}while(0)
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -53,7 +53,7 @@ int scanhash_groestl_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(lane<<3), ptarget) && !opt_benchmark )
|
if ( fulltest( hash+(lane<<3), ptarget) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
submit_solution( work, hash+(lane<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -143,7 +143,7 @@ int scanhash_myriad_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -226,7 +226,7 @@ int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -99,9 +99,13 @@ void hodl_build_block_header( struct work* g_work, uint32_t version,
|
|||||||
// called only by thread 0, saves a backup of g_work
|
// called only by thread 0, saves a backup of g_work
|
||||||
void hodl_get_new_work( struct work* work, struct work* g_work)
|
void hodl_get_new_work( struct work* work, struct work* g_work)
|
||||||
{
|
{
|
||||||
|
pthread_mutex_lock( &g_work_lock );
|
||||||
|
|
||||||
work_free( &hodl_work );
|
work_free( &hodl_work );
|
||||||
work_copy( &hodl_work, g_work );
|
work_copy( &hodl_work, g_work );
|
||||||
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
|
hodl_work.data[ algo_gate.nonce_index ] = ( clock() + rand() ) % 9999;
|
||||||
|
|
||||||
|
pthread_mutex_unlock( &g_work_lock );
|
||||||
}
|
}
|
||||||
|
|
||||||
json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
|
json_t *hodl_longpoll_rpc_call( CURL *curl, int *err, char* lp_url )
|
||||||
|
@@ -129,7 +129,7 @@ int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, lane_hash, mythr, i );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -45,7 +45,7 @@ int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( valid_hash( lane_hash, ptarget ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev,
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
@@ -97,7 +97,7 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( valid_hash( lane_hash, ptarget ))
|
if ( valid_hash( lane_hash, ptarget ))
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev,
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
|
@@ -52,7 +52,7 @@ int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( valid_hash( lane_hash, ptarget ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev,
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
@@ -111,7 +111,7 @@ int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( valid_hash( lane_hash, ptarget ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev,
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
|
@@ -245,7 +245,7 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( unlikely( valid_hash( hash+(lane<<3), ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash+(lane<<3), ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
submit_solution( work, hash+(lane<<3), mythr );
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||||
n += 16;
|
n += 16;
|
||||||
@@ -394,7 +394,7 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
|
@@ -76,37 +76,34 @@ int scanhash_allium( struct work *work, uint32_t max_nonce,
|
|||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[8];
|
uint32_t _ALIGN(128) hash[8];
|
||||||
uint32_t _ALIGN(128) endiandata[20];
|
uint32_t _ALIGN(128) edata[20];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
const int thr_id = mythr->id;
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
ptarget[7] = 0x3ffff;
|
ptarget[7] = 0x3ffff;
|
||||||
|
|
||||||
for ( int i = 0; i < 19; i++ )
|
for ( int i = 0; i < 19; i++ )
|
||||||
be32enc( &endiandata[i], pdata[i] );
|
edata[i] = bswap_32( pdata[i] );
|
||||||
|
|
||||||
sph_blake256_init( &allium_ctx.blake );
|
sph_blake256_init( &allium_ctx.blake );
|
||||||
sph_blake256( &allium_ctx.blake, endiandata, 64 );
|
sph_blake256( &allium_ctx.blake, edata, 64 );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
be32enc( &endiandata[19], nonce );
|
edata[19] = nonce;
|
||||||
allium_hash( hash, endiandata );
|
allium_hash( hash, edata );
|
||||||
if ( hash[7] <= Htarg )
|
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
|
||||||
{
|
{
|
||||||
pdata[19] = nonce;
|
pdata[19] = bswap_32( nonce );
|
||||||
submit_solution( work, hash, mythr );
|
submit_solution( work, hash, mythr );
|
||||||
}
|
}
|
||||||
nonce++;
|
nonce++;
|
||||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -94,12 +94,12 @@ bool lyra2rev2_thread_init()
|
|||||||
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||||
|
|
||||||
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
||||||
#if defined (LYRA2REV2_8WAY)
|
#if defined (LYRA2REV2_16WAY)
|
||||||
l2v2_wholeMatrix = _mm_malloc( 2 * size, 64 ); // 2 way
|
l2v2_wholeMatrix = _mm_malloc( 2 * size, 64 ); // 2 way
|
||||||
init_lyra2rev2_8way_ctx();;
|
init_lyra2rev2_16way_ctx();;
|
||||||
#elif defined (LYRA2REV2_4WAY)
|
#elif defined (LYRA2REV2_8WAY)
|
||||||
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
||||||
init_lyra2rev2_4way_ctx();;
|
init_lyra2rev2_8way_ctx();;
|
||||||
#else
|
#else
|
||||||
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
||||||
init_lyra2rev2_ctx();
|
init_lyra2rev2_ctx();
|
||||||
@@ -109,17 +109,17 @@ bool lyra2rev2_thread_init()
|
|||||||
|
|
||||||
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (LYRA2REV2_8WAY)
|
#if defined (LYRA2REV2_16WAY)
|
||||||
|
gate->scanhash = (void*)&scanhash_lyra2rev2_16way;
|
||||||
|
gate->hash = (void*)&lyra2rev2_16way_hash;
|
||||||
|
#elif defined (LYRA2REV2_8WAY)
|
||||||
gate->scanhash = (void*)&scanhash_lyra2rev2_8way;
|
gate->scanhash = (void*)&scanhash_lyra2rev2_8way;
|
||||||
gate->hash = (void*)&lyra2rev2_8way_hash;
|
gate->hash = (void*)&lyra2rev2_8way_hash;
|
||||||
#elif defined (LYRA2REV2_4WAY)
|
|
||||||
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
|
|
||||||
gate->hash = (void*)&lyra2rev2_4way_hash;
|
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
||||||
gate->hash = (void*)&lyra2rev2_hash;
|
gate->hash = (void*)&lyra2rev2_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
||||||
opt_target_factor = 256.0;
|
opt_target_factor = 256.0;
|
||||||
return true;
|
return true;
|
||||||
|
@@ -51,30 +51,32 @@ bool init_lyra2rev3_ctx();
|
|||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
#define LYRA2REV2_8WAY 1
|
#define LYRA2REV2_16WAY 1
|
||||||
#elif defined(__AVX2__)
|
#elif defined(__AVX2__)
|
||||||
#define LYRA2REV2_4WAY 1
|
#define LYRA2REV2_8WAY 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern __thread uint64_t* l2v2_wholeMatrix;
|
extern __thread uint64_t* l2v2_wholeMatrix;
|
||||||
|
|
||||||
bool register_lyra2rev2_algo( algo_gate_t* gate );
|
bool register_lyra2rev2_algo( algo_gate_t* gate );
|
||||||
|
|
||||||
#if defined(LYRA2REV2_8WAY)
|
#if defined(LYRA2REV2_16WAY)
|
||||||
|
|
||||||
|
void lyra2rev2_16way_hash( void *state, const void *input );
|
||||||
|
int scanhash_lyra2rev2_16way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
bool init_lyra2rev2_16way_ctx();
|
||||||
|
|
||||||
|
#elif defined(LYRA2REV2_8WAY)
|
||||||
|
|
||||||
void lyra2rev2_8way_hash( void *state, const void *input );
|
void lyra2rev2_8way_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool init_lyra2rev2_8way_ctx();
|
bool init_lyra2rev2_8way_ctx();
|
||||||
|
|
||||||
#elif defined(LYRA2REV2_4WAY)
|
|
||||||
|
|
||||||
void lyra2rev2_4way_hash( void *state, const void *input );
|
|
||||||
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
|
||||||
bool init_lyra2rev2_4way_ctx();
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void lyra2rev2_hash( void *state, const void *input );
|
void lyra2rev2_hash( void *state, const void *input );
|
||||||
int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
@@ -76,7 +76,7 @@ int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce,
|
|||||||
&& !opt_benchmark )
|
&& !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||||
|
@@ -7,13 +7,217 @@
|
|||||||
#include "algo/cubehash/cubehash_sse2.h"
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
#include "algo/cubehash/cube-hash-2way.h"
|
#include "algo/cubehash/cube-hash-2way.h"
|
||||||
|
|
||||||
#if defined (LYRA2REV2_8WAY)
|
|
||||||
|
#if defined (LYRA2REV2_16WAY)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
blake256_16way_context blake;
|
||||||
|
keccak256_8way_context keccak;
|
||||||
|
cubehashParam cube;
|
||||||
|
skein256_8way_context skein;
|
||||||
|
bmw256_16way_context bmw;
|
||||||
|
} lyra2v2_16way_ctx_holder __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
static lyra2v2_16way_ctx_holder l2v2_16way_ctx;
|
||||||
|
|
||||||
|
bool init_lyra2rev2_16way_ctx()
|
||||||
|
{
|
||||||
|
keccak256_8way_init( &l2v2_16way_ctx.keccak );
|
||||||
|
cubehashInit( &l2v2_16way_ctx.cube, 256, 16, 32 );
|
||||||
|
skein256_8way_init( &l2v2_16way_ctx.skein );
|
||||||
|
bmw256_16way_init( &l2v2_16way_ctx.bmw );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void lyra2rev2_16way_hash( void *state, const void *input )
|
||||||
|
{
|
||||||
|
uint32_t vhash[8*16] __attribute__ ((aligned (128)));
|
||||||
|
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash4[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash5[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash6[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash7[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash8[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash9[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash10[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash11[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash12[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash13[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash14[8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t hash15[8] __attribute__ ((aligned (64)));
|
||||||
|
lyra2v2_16way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
|
memcpy( &ctx, &l2v2_16way_ctx, sizeof(l2v2_16way_ctx) );
|
||||||
|
|
||||||
|
blake256_16way_update( &ctx.blake, input + (64<<4), 16 );
|
||||||
|
blake256_16way_close( &ctx.blake, vhash );
|
||||||
|
|
||||||
|
dintrlv_16x32( hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7,
|
||||||
|
hash8, hash9, hash10, hash11,
|
||||||
|
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||||
|
|
||||||
|
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, 256 );
|
||||||
|
|
||||||
|
keccak256_8way_update( &ctx.keccak, vhash, 32 );
|
||||||
|
keccak256_8way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
|
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||||
|
intrlv_8x64( vhash, hash8, hash9, hash10, hash11,
|
||||||
|
hash12, hash13, hash14, hash15, 256 );
|
||||||
|
|
||||||
|
keccak256_8way_init( &ctx.keccak );
|
||||||
|
keccak256_8way_update( &ctx.keccak, vhash, 32 );
|
||||||
|
keccak256_8way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
|
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||||
|
hash12, hash13, hash14, hash5, vhash, 256 );
|
||||||
|
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
|
||||||
|
|
||||||
|
|
||||||
|
intrlv_2x256( vhash, hash0, hash1, 256 );
|
||||||
|
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||||
|
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
||||||
|
intrlv_2x256( vhash, hash2, hash3, 256 );
|
||||||
|
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||||
|
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
||||||
|
intrlv_2x256( vhash, hash4, hash5, 256 );
|
||||||
|
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||||
|
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
||||||
|
intrlv_2x256( vhash, hash6, hash7, 256 );
|
||||||
|
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||||
|
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
||||||
|
intrlv_2x256( vhash, hash8, hash9, 256 );
|
||||||
|
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||||
|
dintrlv_2x256( hash8, hash9, vhash, 256 );
|
||||||
|
intrlv_2x256( vhash, hash10, hash11, 256 );
|
||||||
|
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||||
|
dintrlv_2x256( hash10, hash11, vhash, 256 );
|
||||||
|
intrlv_2x256( vhash, hash12, hash13, 256 );
|
||||||
|
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||||
|
dintrlv_2x256( hash12, hash13, vhash, 256 );
|
||||||
|
intrlv_2x256( vhash, hash14, hash15, 256 );
|
||||||
|
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
||||||
|
dintrlv_2x256( hash14, hash15, vhash, 256 );
|
||||||
|
|
||||||
|
intrlv_8x64( vhash, hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, 256 );
|
||||||
|
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||||
|
skein256_8way_close( &ctx.skein, vhash );
|
||||||
|
|
||||||
|
dintrlv_8x64( hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||||
|
intrlv_8x64( vhash, hash8, hash9, hash10, hash11, hash12,
|
||||||
|
hash13, hash14, hash15, 256 );
|
||||||
|
|
||||||
|
skein256_8way_init( &ctx.skein );
|
||||||
|
skein256_8way_update( &ctx.skein, vhash, 32 );
|
||||||
|
skein256_8way_close( &ctx.skein, vhash );
|
||||||
|
|
||||||
|
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||||
|
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||||
|
|
||||||
|
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash8, 256, (const byte*) hash8, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash9, 256, (const byte*) hash9, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash10, 256, (const byte*) hash10, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash11, 256, (const byte*) hash11, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash12, 256, (const byte*) hash12, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash13, 256, (const byte*) hash13, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash14, 256, (const byte*) hash14, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash15, 256, (const byte*) hash15, 32 );
|
||||||
|
|
||||||
|
intrlv_16x32( vhash, hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7,
|
||||||
|
hash8, hash9, hash10, hash11,
|
||||||
|
hash12, hash13, hash14, hash15, 256 );
|
||||||
|
|
||||||
|
bmw256_16way_update( &ctx.bmw, vhash, 32 );
|
||||||
|
bmw256_16way_close( &ctx.bmw, state );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_lyra2rev2_16way( struct work *work, const uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||||
|
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t *hashd7 = &hash[7*16];
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 16;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
const uint32_t targ32 = ptarget[7];
|
||||||
|
__m512i *noncev = (__m512i*)vdata + 19;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
if ( bench ) ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
|
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||||
|
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||||
|
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
|
||||||
|
blake256_16way_init( &l2v2_16way_ctx.blake );
|
||||||
|
blake256_16way_update( &l2v2_16way_ctx.blake, vdata, 64 );
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
lyra2rev2_16way_hash( hash, vdata );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 16; lane++ )
|
||||||
|
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||||
|
{
|
||||||
|
extr_lane_16x32( lane_hash, hash, lane, 256 );
|
||||||
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n + lane );
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||||
|
n += 16;
|
||||||
|
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined (LYRA2REV2_8WAY)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
blake256_8way_context blake;
|
blake256_8way_context blake;
|
||||||
keccak256_8way_context keccak;
|
keccak256_4way_context keccak;
|
||||||
cube_4way_context cube;
|
cubehashParam cube;
|
||||||
skein256_8way_context skein;
|
skein256_4way_context skein;
|
||||||
bmw256_8way_context bmw;
|
bmw256_8way_context bmw;
|
||||||
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
|
} lyra2v2_8way_ctx_holder __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
@@ -21,9 +225,9 @@ static lyra2v2_8way_ctx_holder l2v2_8way_ctx;
|
|||||||
|
|
||||||
bool init_lyra2rev2_8way_ctx()
|
bool init_lyra2rev2_8way_ctx()
|
||||||
{
|
{
|
||||||
keccak256_8way_init( &l2v2_8way_ctx.keccak );
|
keccak256_4way_init( &l2v2_8way_ctx.keccak );
|
||||||
cube_4way_init( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
cubehashInit( &l2v2_8way_ctx.cube, 256, 16, 32 );
|
||||||
skein256_8way_init( &l2v2_8way_ctx.skein );
|
skein256_4way_init( &l2v2_8way_ctx.skein );
|
||||||
bmw256_8way_init( &l2v2_8way_ctx.bmw );
|
bmw256_8way_init( &l2v2_8way_ctx.bmw );
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -31,8 +235,6 @@ bool init_lyra2rev2_8way_ctx()
|
|||||||
void lyra2rev2_8way_hash( void *state, const void *input )
|
void lyra2rev2_8way_hash( void *state, const void *input )
|
||||||
{
|
{
|
||||||
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t vhashA[8*8] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t vhashB[8*8] __attribute__ ((aligned (64)));
|
|
||||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||||
@@ -47,103 +249,113 @@ void lyra2rev2_8way_hash( void *state, const void *input )
|
|||||||
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
blake256_8way_update( &ctx.blake, input + (64<<3), 16 );
|
||||||
blake256_8way_close( &ctx.blake, vhash );
|
blake256_8way_close( &ctx.blake, vhash );
|
||||||
|
|
||||||
rintrlv_8x32_8x64( vhashA, vhash, 256 );
|
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||||
|
|
||||||
keccak256_8way_update( &ctx.keccak, vhashA, 32 );
|
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||||
keccak256_8way_close( &ctx.keccak, vhash );
|
keccak256_4way_update( &ctx.keccak, vhash, 32 );
|
||||||
|
keccak256_4way_close( &ctx.keccak, vhash );
|
||||||
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||||
|
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||||
|
keccak256_4way_init( &ctx.keccak );
|
||||||
|
keccak256_4way_update( &ctx.keccak, vhash, 32 );
|
||||||
|
keccak256_4way_close( &ctx.keccak, vhash );
|
||||||
|
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||||
|
|
||||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
|
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||||
|
|
||||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
|
||||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
|
||||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
|
||||||
|
|
||||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
LYRA2REV2( l2v2_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
LYRA2REV2( l2v2_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV2( l2v2_wholeMatrix, hash4, 32, hash4, 32, hash4, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV2( l2v2_wholeMatrix, hash5, 32, hash5, 32, hash5, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV2( l2v2_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
|
||||||
|
LYRA2REV2( l2v2_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
|
||||||
|
|
||||||
intrlv_2x256( vhash, hash0, hash1, 256 );
|
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
skein256_4way_update( &ctx.skein, vhash, 32 );
|
||||||
dintrlv_2x256( hash0, hash1, vhash, 256 );
|
skein256_4way_close( &ctx.skein, vhash );
|
||||||
intrlv_2x256( vhash, hash2, hash3, 256 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
intrlv_4x64( vhash, hash4, hash5, hash6, hash7, 256 );
|
||||||
dintrlv_2x256( hash2, hash3, vhash, 256 );
|
skein256_4way_init( &ctx.skein );
|
||||||
intrlv_2x256( vhash, hash4, hash5, 256 );
|
skein256_4way_update( &ctx.skein, vhash, 32 );
|
||||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
skein256_4way_close( &ctx.skein, vhash );
|
||||||
dintrlv_2x256( hash4, hash5, vhash, 256 );
|
dintrlv_4x64( hash4, hash5, hash6, hash7, vhash, 256 );
|
||||||
intrlv_2x256( vhash, hash6, hash7, 256 );
|
|
||||||
LYRA2REV2_2WAY( l2v2_wholeMatrix, vhash, 32, vhash, 32, 1, 4, 4 );
|
|
||||||
dintrlv_2x256( hash6, hash7, vhash, 256 );
|
|
||||||
|
|
||||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||||
hash7, 256 );
|
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash2, 256, (const byte*) hash2, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash3, 256, (const byte*) hash3, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash4, 256, (const byte*) hash4, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash5, 256, (const byte*) hash5, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash6, 256, (const byte*) hash6, 32 );
|
||||||
|
cubehash_full( &ctx.cube, (byte*) hash7, 256, (const byte*) hash7, 32 );
|
||||||
|
|
||||||
skein256_8way_update( &ctx.skein, vhash, 32 );
|
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||||
skein256_8way_close( &ctx.skein, vhash );
|
hash4, hash5, hash6, hash7, 256 );
|
||||||
|
|
||||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 256 );
|
|
||||||
|
|
||||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
|
||||||
cube_4way_update_close( &ctx.cube, vhashA, vhashA, 32 );
|
|
||||||
cube_4way_init( &ctx.cube, 256, 16, 32 );
|
|
||||||
cube_4way_update_close( &ctx.cube, vhashB, vhashB, 32 );
|
|
||||||
|
|
||||||
dintrlv_4x128( hash0, hash1, hash2, hash3, vhashA, 256 );
|
|
||||||
dintrlv_4x128( hash4, hash5, hash6, hash7, vhashB, 256 );
|
|
||||||
|
|
||||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
|
||||||
hash7, 256 );
|
|
||||||
|
|
||||||
bmw256_8way_update( &ctx.bmw, vhash, 32 );
|
bmw256_8way_update( &ctx.bmw, vhash, 32 );
|
||||||
bmw256_8way_close( &ctx.bmw, state );
|
bmw256_8way_close( &ctx.bmw, state );
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_lyra2rev2_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2rev2_8way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[7<<3]);
|
uint32_t *hashd7 = &hash[7*8];
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t last_nonce = max_nonce - 8;
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t targ32 = ptarget[7];
|
||||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
__m256i *noncev = (__m256i*)vdata + 19;
|
||||||
int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( bench ) ptarget[7] = 0x0000ff;
|
||||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
|
||||||
|
|
||||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||||
|
*noncev = _mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n );
|
||||||
blake256_8way_init( &l2v2_8way_ctx.blake );
|
blake256_8way_init( &l2v2_8way_ctx.blake );
|
||||||
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
|
blake256_8way_update( &l2v2_8way_ctx.blake, vdata, 64 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
|
||||||
n+3, n+2, n+1, n ) );
|
|
||||||
|
|
||||||
lyra2rev2_8way_hash( hash, vdata );
|
lyra2rev2_8way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
|
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||||
{
|
{
|
||||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||||
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
#elif defined (LYRA2REV2_4WAY)
|
#elif defined (LYRA2REV2_4WAY)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -226,15 +438,16 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hashd7 = &(hash[7<<2]);
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t targ32 = ptarget[7];
|
||||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
__m128i *noncev = (__m128i*)vdata + 19;
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
int thr_id = mythr->id;
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
@@ -249,21 +462,22 @@ int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||||
|
|
||||||
lyra2rev2_4way_hash( hash, vdata );
|
lyra2rev2_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
|
||||||
{
|
{
|
||||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||||
*hashes_done = n - first_nonce + 1;
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
*/
|
||||||
|
@@ -99,7 +99,7 @@ int scanhash_lyra2rev2( struct work *work,
|
|||||||
lyra2rev2_hash(hash, endiandata);
|
lyra2rev2_hash(hash, endiandata);
|
||||||
|
|
||||||
if (hash[7] <= Htarg )
|
if (hash[7] <= Htarg )
|
||||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
if( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
submit_solution( work, hash, mythr );
|
submit_solution( work, hash, mythr );
|
||||||
|
@@ -130,15 +130,15 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
uint32_t hash[8*16] __attribute__ ((aligned (128)));
|
||||||
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*16] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &hash[7<<4];
|
uint32_t *hashd7 = &hash[7*16];
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const uint32_t last_nonce = max_nonce - 16;
|
const uint32_t last_nonce = max_nonce - 16;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t targ32 = ptarget[7];
|
||||||
__m512i *noncev = (__m512i*)vdata + 19; // aligned
|
__m512i *noncev = (__m512i*)vdata + 19;
|
||||||
const int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
|
|
||||||
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
if ( opt_benchmark ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
@@ -159,17 +159,18 @@ int scanhash_lyra2rev3_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int lane = 0; lane < 16; lane++ )
|
for ( int lane = 0; lane < 16; lane++ )
|
||||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||||
{
|
{
|
||||||
extr_lane_16x32( lane_hash, hash, lane, 256 );
|
extr_lane_16x32( lane_hash, hash, lane, 256 );
|
||||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 16;
|
n += 16;
|
||||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||||
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -194,7 +195,7 @@ bool init_lyra2rev3_8way_ctx()
|
|||||||
|
|
||||||
void lyra2rev3_8way_hash( void *state, const void *input )
|
void lyra2rev3_8way_hash( void *state, const void *input )
|
||||||
{
|
{
|
||||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
uint32_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||||
@@ -250,17 +251,17 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
|||||||
int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &hash[7<<3];
|
uint32_t *hashd7 = &hash[7*8];
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t last_nonce = max_nonce - 8;
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t targ32 = ptarget[7];
|
||||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
__m256i *noncev = (__m256i*)vdata + 19;
|
||||||
const int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
const bool bench = opt_benchmark;
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
@@ -277,13 +278,13 @@ int scanhash_lyra2rev3_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
if ( unlikely( hashd7[lane] <= targ32 ) )
|
||||||
{
|
{
|
||||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||||
@@ -357,42 +358,41 @@ int scanhash_lyra2rev3_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
{
|
{
|
||||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hashd7 = &(hash[7*4]);
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t targ32 = ptarget[7];
|
||||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
__m128i *noncev = (__m128i*)vdata + 19;
|
||||||
const int thr_id = mythr->id; // thr_id arg is deprecated
|
const int thr_id = mythr->id;
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( opt_benchmark )
|
||||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||||
|
|
||||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||||
|
*noncev = _mm_set_epi32( n+3, n+2, n+1, n );
|
||||||
|
|
||||||
blake256_4way_init( &l2v3_4way_ctx.blake );
|
blake256_4way_init( &l2v3_4way_ctx.blake );
|
||||||
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
|
blake256_4way_update( &l2v3_4way_ctx.blake, vdata, 64 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
|
||||||
|
|
||||||
lyra2rev3_4way_hash( hash, vdata );
|
lyra2rev3_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
for ( int lane = 0; lane < 4; lane++ ) if ( hashd7[lane] <= targ32 )
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
|
||||||
{
|
{
|
||||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( valid_hash( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||||
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -88,7 +88,7 @@ int scanhash_lyra2rev3( struct work *work,
|
|||||||
lyra2rev3_hash(hash, endiandata);
|
lyra2rev3_hash(hash, endiandata);
|
||||||
|
|
||||||
if (hash[7] <= Htarg )
|
if (hash[7] <= Htarg )
|
||||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
if( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
submit_solution( work, hash, mythr );
|
submit_solution( work, hash, mythr );
|
||||||
|
@@ -124,7 +124,7 @@ int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
*noncev = _mm512_add_epi32( *noncev, m512_const1_32( 16 ) );
|
||||||
@@ -222,7 +222,7 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
*noncev = _mm256_add_epi32( *noncev, m256_const1_32( 8 ) );
|
||||||
@@ -301,7 +301,7 @@ int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
|
*noncev = _mm_add_epi32( *noncev, m128_const1_32( 4 ) );
|
||||||
|
@@ -56,7 +56,7 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
int thr_id = mythr->id;
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
@@ -71,8 +71,7 @@ int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
|||||||
be32enc(&endiandata[19], nonce);
|
be32enc(&endiandata[19], nonce);
|
||||||
lyra2z_hash( hash, endiandata );
|
lyra2z_hash( hash, endiandata );
|
||||||
|
|
||||||
if ( hash[7] <= Htarg )
|
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
|
||||||
{
|
{
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
submit_solution( work, hash, mythr );
|
submit_solution( work, hash, mythr );
|
||||||
|
@@ -18,38 +18,40 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
|||||||
int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
|
int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
uint32_t hash[8] __attribute__ ((aligned (128)));
|
||||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
const int thr_id = mythr->id;
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
ptarget[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( &endiandata[19], nonce );
|
edata[19] = nonce;
|
||||||
lyra2z330_hash( hash, endiandata, work->height );
|
|
||||||
if ( hash[7] <= Htarg )
|
LYRA2Z( lyra2z330_wholeMatrix, hash, 32, edata, 80, edata, 80,
|
||||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
2, 330, 256 );
|
||||||
|
|
||||||
|
// lyra2z330_hash( hash, edata, work->height );
|
||||||
|
if ( valid_hash( hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = nonce;
|
be32enc( pdata + 19, nonce );
|
||||||
submit_solution( work, hash, mythr );
|
submit_solution( work, hash, mythr );
|
||||||
}
|
}
|
||||||
nonce++;
|
nonce++;
|
||||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = nonce - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,7 +68,7 @@ bool lyra2z330_thread_init()
|
|||||||
|
|
||||||
bool register_lyra2z330_algo( algo_gate_t* gate )
|
bool register_lyra2z330_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||||
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
|
gate->miner_thread_init = (void*)&lyra2z330_thread_init;
|
||||||
gate->scanhash = (void*)&scanhash_lyra2z330;
|
gate->scanhash = (void*)&scanhash_lyra2z330;
|
||||||
gate->hash = (void*)&lyra2z330_hash;
|
gate->hash = (void*)&lyra2z330_hash;
|
||||||
|
@@ -302,7 +302,7 @@ int scanhash_phi2_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( valid_hash( lane_hash, ptarget ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
be32enc( pdata + 19, n + lane );
|
be32enc( pdata + 19, n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -483,7 +483,7 @@ int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( valid_hash( lane_hash, ptarget ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
be32enc( pdata + 19, n + lane );
|
be32enc( pdata + 19, n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
edata[ 19 ] += 4;
|
edata[ 19 ] += 4;
|
||||||
|
@@ -311,7 +311,7 @@ bool register_m7m_algo( algo_gate_t *gate )
|
|||||||
{
|
{
|
||||||
gate->optimizations = SHA_OPT;
|
gate->optimizations = SHA_OPT;
|
||||||
init_m7m_ctx();
|
init_m7m_ctx();
|
||||||
gate->scanhash = (void*)scanhash_m7m_hash;
|
gate->scanhash = (void*)&scanhash_m7m_hash;
|
||||||
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
|
||||||
gate->work_decode = (void*)&std_be_work_decode;
|
gate->work_decode = (void*)&std_be_work_decode;
|
||||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||||
|
@@ -108,7 +108,7 @@ int scanhash_nist5_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -196,7 +196,7 @@ int scanhash_nist5_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -156,6 +156,8 @@ int scanhash_zr5( struct work *work, uint32_t max_nonce,
|
|||||||
void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||||
uint32_t* end_nonce_ptr )
|
uint32_t* end_nonce_ptr )
|
||||||
{
|
{
|
||||||
|
pthread_mutex_lock( &g_work_lock );
|
||||||
|
|
||||||
// ignore POK in first word
|
// ignore POK in first word
|
||||||
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
|
||||||
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
|
uint32_t *nonceptr = work->data + algo_gate.nonce_index;
|
||||||
@@ -171,6 +173,8 @@ void zr5_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
++(*nonceptr);
|
++(*nonceptr);
|
||||||
|
|
||||||
|
pthread_mutex_unlock( &g_work_lock );
|
||||||
}
|
}
|
||||||
|
|
||||||
void zr5_display_pok( struct work* work )
|
void zr5_display_pok( struct work* work )
|
||||||
|
@@ -1,18 +1,241 @@
|
|||||||
#include "cpuminer-config.h"
|
#include "cpuminer-config.h"
|
||||||
#include "anime-gate.h"
|
#include "anime-gate.h"
|
||||||
|
|
||||||
#if defined (ANIME_4WAY)
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "algo/blake/blake-hash-4way.h"
|
#include "algo/blake/blake-hash-4way.h"
|
||||||
#include "algo/bmw/bmw-hash-4way.h"
|
#include "algo/bmw/bmw-hash-4way.h"
|
||||||
#include "algo/skein/skein-hash-4way.h"
|
#include "algo/skein/skein-hash-4way.h"
|
||||||
#include "algo/jh/jh-hash-4way.h"
|
#include "algo/jh/jh-hash-4way.h"
|
||||||
#include "algo/keccak/keccak-hash-4way.h"
|
#include "algo/keccak/keccak-hash-4way.h"
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
|
#if defined(__VAES__)
|
||||||
|
#include "algo/groestl/groestl512-hash-4way.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined (ANIME_8WAY)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
blake512_8way_context blake;
|
||||||
|
bmw512_8way_context bmw;
|
||||||
|
#if defined(__VAES__)
|
||||||
|
groestl512_4way_context groestl;
|
||||||
|
#else
|
||||||
|
hashState_groestl groestl;
|
||||||
|
#endif
|
||||||
|
jh512_8way_context jh;
|
||||||
|
skein512_8way_context skein;
|
||||||
|
keccak512_8way_context keccak;
|
||||||
|
} anime_8way_ctx_holder;
|
||||||
|
|
||||||
|
anime_8way_ctx_holder anime_8way_ctx __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
void init_anime_8way_ctx()
|
||||||
|
{
|
||||||
|
blake512_8way_init( &anime_8way_ctx.blake );
|
||||||
|
bmw512_8way_init( &anime_8way_ctx.bmw );
|
||||||
|
#if defined(__VAES__)
|
||||||
|
groestl512_4way_init( &anime_8way_ctx.groestl, 64 );
|
||||||
|
#else
|
||||||
|
init_groestl( &anime_8way_ctx.groestl, 64 );
|
||||||
|
#endif
|
||||||
|
skein512_8way_init( &anime_8way_ctx.skein );
|
||||||
|
jh512_8way_init( &anime_8way_ctx.jh );
|
||||||
|
keccak512_8way_init( &anime_8way_ctx.keccak );
|
||||||
|
}
|
||||||
|
|
||||||
|
void anime_8way_hash( void *state, const void *input )
|
||||||
|
{
|
||||||
|
uint64_t vhash[8*8] __attribute__ ((aligned (128)));
|
||||||
|
uint64_t vhashA[8*8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t vhashB[8*8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t vhashC[8*8] __attribute__ ((aligned (64)));
|
||||||
|
#if !defined(__VAES__)
|
||||||
|
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t hash4[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t hash5[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t hash6[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t hash7[8] __attribute__ ((aligned (64)));
|
||||||
|
#endif
|
||||||
|
__m512i* vh = (__m512i*)vhash;
|
||||||
|
__m512i* vhA = (__m512i*)vhashA;
|
||||||
|
__m512i* vhB = (__m512i*)vhashB;
|
||||||
|
__m512i* vhC = (__m512i*)vhashC;
|
||||||
|
const __m512i bit3_mask = m512_const1_64( 8 );
|
||||||
|
const __m512i zero = _mm512_setzero_si512();
|
||||||
|
__mmask8 vh_mask;
|
||||||
|
anime_8way_ctx_holder ctx;
|
||||||
|
memcpy( &ctx, &anime_8way_ctx, sizeof(anime_8way_ctx) );
|
||||||
|
|
||||||
|
bmw512_8way_full( &ctx.bmw, vhash, input, 80 );
|
||||||
|
|
||||||
|
blake512_8way_full( &ctx.blake, vhash, vhash, 64 );
|
||||||
|
|
||||||
|
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||||
|
zero );
|
||||||
|
|
||||||
|
#if defined(__VAES__)
|
||||||
|
|
||||||
|
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
|
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||||
|
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||||
|
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||||
|
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||||
|
|
||||||
|
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, vhash );
|
||||||
|
|
||||||
|
if ( hash0[0] & 8 )
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
|
if ( hash1[0] & 8 )
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
|
if ( hash2[0] & 8)
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
|
if ( hash3[0] & 8 )
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
if ( hash4[0] & 8 )
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||||
|
if ( hash5[0] & 8 )
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||||
|
if ( hash6[0] & 8 )
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||||
|
if ( hash7[0] & 8 )
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||||
|
|
||||||
|
intrlv_8x64_512( vhashC, hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7 );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ( vh_mask & 0xff )
|
||||||
|
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||||
|
|
||||||
|
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
||||||
|
|
||||||
|
#if defined(__VAES__)
|
||||||
|
|
||||||
|
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
|
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||||
|
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||||
|
|
||||||
|
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
dintrlv_8x64_512( hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7, vhash );
|
||||||
|
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||||
|
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||||
|
|
||||||
|
intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
|
||||||
|
hash4, hash5, hash6, hash7 );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
jh512_8way_init( &ctx.jh );
|
||||||
|
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||||
|
jh512_8way_close( &ctx.jh, vhash );
|
||||||
|
|
||||||
|
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||||
|
zero );
|
||||||
|
|
||||||
|
if ( ( vh_mask & 0xff ) != 0xff )
|
||||||
|
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||||
|
if ( vh_mask & 0xff )
|
||||||
|
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
|
||||||
|
|
||||||
|
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
keccak512_8way_init( &ctx.keccak );
|
||||||
|
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||||
|
keccak512_8way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
|
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||||
|
|
||||||
|
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||||
|
zero );
|
||||||
|
|
||||||
|
if ( ( vh_mask & 0xff ) != 0xff )
|
||||||
|
{
|
||||||
|
keccak512_8way_init( &ctx.keccak );
|
||||||
|
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||||
|
keccak512_8way_close( &ctx.keccak, vhashA );
|
||||||
|
}
|
||||||
|
if ( vh_mask & 0xff )
|
||||||
|
{
|
||||||
|
jh512_8way_init( &ctx.jh );
|
||||||
|
jh512_8way_update( &ctx.jh, vhash, 64 );
|
||||||
|
jh512_8way_close( &ctx.jh, vhashB );
|
||||||
|
}
|
||||||
|
|
||||||
|
casti_m512i( state,0 ) = _mm512_mask_blend_epi64( vh_mask, vhA[0], vhB[0] );
|
||||||
|
casti_m512i( state,1 ) = _mm512_mask_blend_epi64( vh_mask, vhA[1], vhB[1] );
|
||||||
|
casti_m512i( state,2 ) = _mm512_mask_blend_epi64( vh_mask, vhA[2], vhB[2] );
|
||||||
|
casti_m512i( state,3 ) = _mm512_mask_blend_epi64( vh_mask, vhA[3], vhB[3] );
|
||||||
|
}
|
||||||
|
|
||||||
|
int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
uint64_t hash64[4*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t *hash64_q3 = &(hash64[3*8]);
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||||
|
uint32_t n = pdata[19];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
|
__m512i *noncev = (__m512i*)vdata + 9;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
|
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||||
|
*noncev = mm512_intrlv_blend_32(
|
||||||
|
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
|
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
anime_8way_hash( hash64, vdata );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
|
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||||
|
{
|
||||||
|
extr_lane_8x64( lane_hash, hash64, lane, 256 );
|
||||||
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n + lane );
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
|
m512_const1_64( 0x0000000800000000 ) );
|
||||||
|
n += 8;
|
||||||
|
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined (ANIME_4WAY)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
blake512_4way_context blake;
|
blake512_4way_context blake;
|
||||||
@@ -23,18 +246,6 @@ typedef struct {
|
|||||||
keccak512_4way_context keccak;
|
keccak512_4way_context keccak;
|
||||||
} anime_4way_ctx_holder;
|
} anime_4way_ctx_holder;
|
||||||
|
|
||||||
anime_4way_ctx_holder anime_4way_ctx __attribute__ ((aligned (64)));
|
|
||||||
|
|
||||||
void init_anime_4way_ctx()
|
|
||||||
{
|
|
||||||
blake512_4way_init( &anime_4way_ctx.blake );
|
|
||||||
bmw512_4way_init( &anime_4way_ctx.bmw );
|
|
||||||
init_groestl( &anime_4way_ctx.groestl, 64 );
|
|
||||||
skein512_4way_init( &anime_4way_ctx.skein );
|
|
||||||
jh512_4way_init( &anime_4way_ctx.jh );
|
|
||||||
keccak512_4way_init( &anime_4way_ctx.keccak );
|
|
||||||
}
|
|
||||||
|
|
||||||
void anime_4way_hash( void *state, const void *input )
|
void anime_4way_hash( void *state, const void *input )
|
||||||
{
|
{
|
||||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||||
@@ -48,81 +259,61 @@ void anime_4way_hash( void *state, const void *input )
|
|||||||
__m256i* vhA = (__m256i*)vhashA;
|
__m256i* vhA = (__m256i*)vhashA;
|
||||||
__m256i* vhB = (__m256i*)vhashB;
|
__m256i* vhB = (__m256i*)vhashB;
|
||||||
__m256i vh_mask;
|
__m256i vh_mask;
|
||||||
const uint32_t mask = 8;
|
int h_mask;
|
||||||
const __m256i bit3_mask = m256_const1_64( 8 );
|
const __m256i bit3_mask = m256_const1_64( 8 );
|
||||||
const __m256i zero = _mm256_setzero_si256();
|
const __m256i zero = _mm256_setzero_si256();
|
||||||
anime_4way_ctx_holder ctx;
|
anime_4way_ctx_holder ctx;
|
||||||
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
|
|
||||||
|
|
||||||
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way_update( &ctx.bmw, input, 80 );
|
bmw512_4way_update( &ctx.bmw, input, 80 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
blake512_4way_full( &ctx.blake, vhash, vhash, 64 );
|
||||||
blake512_4way_close( &ctx.blake, vhash );
|
|
||||||
|
|
||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||||
|
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||||
|
|
||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
if ( hash0[0] & mask )
|
// A
|
||||||
{
|
if ( hash0[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
(char*)hash0, 512 );
|
if ( hash1[0] & 8 )
|
||||||
}
|
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
if ( hash1[0] & mask )
|
if ( hash2[0] & 8)
|
||||||
{
|
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash3[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
(char*)hash1, 512 );
|
|
||||||
}
|
|
||||||
if ( hash2[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
|
||||||
(char*)hash2, 512 );
|
|
||||||
}
|
|
||||||
if ( hash3[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
|
||||||
(char*)hash3, 512 );
|
|
||||||
}
|
|
||||||
|
|
||||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
if ( mm256_anybits0( vh_mask ) )
|
// B
|
||||||
{
|
if ( h_mask & 0xffffffff )
|
||||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||||
skein512_4way_close( &ctx.skein, vhashB );
|
|
||||||
}
|
|
||||||
|
|
||||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
reinit_groestl( &ctx.groestl );
|
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
|
||||||
|
|
||||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
|
jh512_4way_init( &ctx.jh );
|
||||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||||
jh512_4way_close( &ctx.jh, vhash );
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
|
|
||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||||
|
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||||
|
|
||||||
if ( mm256_anybits1( vh_mask ) )
|
// A
|
||||||
{
|
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||||
blake512_4way_init( &ctx.blake );
|
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
// B
|
||||||
blake512_4way_close( &ctx.blake, vhashA );
|
if ( h_mask & 0xffffffff )
|
||||||
}
|
|
||||||
if ( mm256_anybits0( vh_mask ) )
|
|
||||||
{
|
{
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||||
@@ -131,64 +322,74 @@ void anime_4way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
|
keccak512_4way_init( &ctx.keccak );
|
||||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
|
||||||
|
|
||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||||
|
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||||
|
|
||||||
if ( mm256_anybits1( vh_mask ) )
|
// A
|
||||||
|
if ( ( h_mask & 0xffffffff ) != 0xffffffff )
|
||||||
{
|
{
|
||||||
keccak512_4way_init( &ctx.keccak );
|
keccak512_4way_init( &ctx.keccak );
|
||||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||||
}
|
}
|
||||||
if ( mm256_anybits0( vh_mask ) )
|
// B
|
||||||
|
if ( h_mask & 0xffffffff )
|
||||||
{
|
{
|
||||||
jh512_4way_init( &ctx.jh );
|
jh512_4way_init( &ctx.jh );
|
||||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||||
jh512_4way_close( &ctx.jh, vhashB );
|
jh512_4way_close( &ctx.jh, vhashB );
|
||||||
}
|
}
|
||||||
|
|
||||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
casti_m256i( state, 0 ) = _mm256_blendv_epi8( vhA[0], vhB[0], vh_mask );
|
||||||
|
casti_m256i( state, 1 ) = _mm256_blendv_epi8( vhA[1], vhB[1], vh_mask );
|
||||||
dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
casti_m256i( state, 2 ) = _mm256_blendv_epi8( vhA[2], vhB[2], vh_mask );
|
||||||
|
casti_m256i( state, 3 ) = _mm256_blendv_epi8( vhA[3], vhB[3], vh_mask );
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint64_t hash64[4*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
|
uint64_t *hash64_q3 = &(hash64[3*4]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t last_nonce = max_nonce - 4;
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9;
|
||||||
const int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
*noncev = mm256_intrlv_blend_32(
|
*noncev = mm256_intrlv_blend_32(
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
anime_4way_hash( hash, vdata );
|
anime_4way_hash( hash64, vdata );
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
if ( valid_hash( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n+i );
|
extr_lane_4x64( lane_hash, hash64, lane, 256 );
|
||||||
submit_solution( work, hash+(i<<3), mythr );
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
|
{
|
||||||
|
pdata[19] = bswap_32( n + lane );
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev,
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
m256_const1_64( 0x0000000400000000 ) );
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
|
@@ -2,8 +2,10 @@
|
|||||||
|
|
||||||
bool register_anime_algo( algo_gate_t* gate )
|
bool register_anime_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined (ANIME_4WAY)
|
#if defined (ANIME_8WAY)
|
||||||
init_anime_4way_ctx();
|
gate->scanhash = (void*)&scanhash_anime_8way;
|
||||||
|
gate->hash = (void*)&anime_8way_hash;
|
||||||
|
#elif defined (ANIME_4WAY)
|
||||||
gate->scanhash = (void*)&scanhash_anime_4way;
|
gate->scanhash = (void*)&scanhash_anime_4way;
|
||||||
gate->hash = (void*)&anime_4way_hash;
|
gate->hash = (void*)&anime_4way_hash;
|
||||||
#else
|
#else
|
||||||
@@ -11,7 +13,7 @@ bool register_anime_algo( algo_gate_t* gate )
|
|||||||
gate->scanhash = (void*)&scanhash_anime;
|
gate->scanhash = (void*)&scanhash_anime;
|
||||||
gate->hash = (void*)&anime_hash;
|
gate->hash = (void*)&anime_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -4,18 +4,25 @@
|
|||||||
#include "algo-gate-api.h"
|
#include "algo-gate-api.h"
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#if defined(__AVX2__) && defined(__AES__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
#define ANIME_4WAY
|
#define ANIME_8WAY 1
|
||||||
|
#elif defined(__AVX2__) && defined(__AES__)
|
||||||
|
#define ANIME_4WAY 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool register_anime_algo( algo_gate_t* gate );
|
bool register_anime_algo( algo_gate_t* gate );
|
||||||
|
|
||||||
#if defined(ANIME_4WAY)
|
#if defined(ANIME_8WAY)
|
||||||
|
|
||||||
|
void anime_8way_hash( void *state, const void *input );
|
||||||
|
int scanhash_anime_8way( struct work *work, uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
|
#elif defined(ANIME_4WAY)
|
||||||
|
|
||||||
void anime_4way_hash( void *state, const void *input );
|
void anime_4way_hash( void *state, const void *input );
|
||||||
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
void init_anime_4way_ctx();
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -72,11 +72,9 @@ void quark_8way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
memcpy( &ctx, &quark_8way_ctx, sizeof(quark_8way_ctx) );
|
memcpy( &ctx, &quark_8way_ctx, sizeof(quark_8way_ctx) );
|
||||||
|
|
||||||
blake512_8way_update( &ctx.blake, input, 80 );
|
blake512_8way_full( &ctx.blake, vhash, input, 80 );
|
||||||
blake512_8way_close( &ctx.blake, vhash );
|
|
||||||
|
|
||||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 );
|
||||||
bmw512_8way_close( &ctx.bmw, vhash );
|
|
||||||
|
|
||||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||||
zero );
|
zero );
|
||||||
@@ -87,15 +85,10 @@ void quark_8way_hash( void *state, const void *input )
|
|||||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
if ( ( vh_mask & 0x0f ) != 0x0f )
|
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||||
{
|
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||||
groestl512_4way_init( &ctx.groestl, 64 );
|
|
||||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
|
||||||
}
|
|
||||||
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||||
{
|
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||||
groestl512_4way_init( &ctx.groestl, 64 );
|
|
||||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
|
||||||
}
|
|
||||||
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
rintrlv_4x128_8x64( vhashC, vhashA, vhashB, 512 );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
@@ -103,53 +96,22 @@ void quark_8way_hash( void *state, const void *input )
|
|||||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||||
vhash, 512 );
|
vhash, 512 );
|
||||||
|
|
||||||
if ( hash0[0] & mask )
|
if ( hash0[0] & 8 )
|
||||||
{
|
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
if ( hash1[0] & 8 )
|
||||||
(char*)hash0, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
}
|
if ( hash2[0] & 8)
|
||||||
if ( hash1[0] & mask )
|
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
{
|
if ( hash3[0] & 8 )
|
||||||
reinit_groestl( &ctx.groestl );
|
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
if ( hash4[0] & 8 )
|
||||||
(char*)hash1, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||||
}
|
if ( hash5[0] & 8 )
|
||||||
if ( hash2[0] & mask )
|
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||||
{
|
if ( hash6[0] & 8 )
|
||||||
reinit_groestl( &ctx.groestl );
|
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
if ( hash7[0] & 8 )
|
||||||
(char*)hash2, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||||
}
|
|
||||||
if ( hash3[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
|
||||||
(char*)hash3, 512 );
|
|
||||||
}
|
|
||||||
if ( hash4[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash4,
|
|
||||||
(char*)hash4, 512 );
|
|
||||||
}
|
|
||||||
if ( hash5[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash5,
|
|
||||||
(char*)hash5, 512 );
|
|
||||||
}
|
|
||||||
if ( hash6[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash6,
|
|
||||||
(char*)hash6, 512 );
|
|
||||||
}
|
|
||||||
if ( hash7[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash7,
|
|
||||||
(char*)hash7, 512 );
|
|
||||||
}
|
|
||||||
|
|
||||||
intrlv_8x64( vhashC, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
intrlv_8x64( vhashC, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||||
hash7, 512 );
|
hash7, 512 );
|
||||||
@@ -157,10 +119,7 @@ void quark_8way_hash( void *state, const void *input )
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ( vh_mask & 0xff )
|
if ( vh_mask & 0xff )
|
||||||
{
|
skein512_8way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
|
||||||
skein512_8way_close( &ctx.skein, vhashB );
|
|
||||||
}
|
|
||||||
|
|
||||||
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
mm512_blend_hash_8x64( vh, vhC, vhB, vh_mask );
|
||||||
|
|
||||||
@@ -168,10 +127,10 @@ void quark_8way_hash( void *state, const void *input )
|
|||||||
|
|
||||||
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
rintrlv_8x64_4x128( vhashA, vhashB, vhash, 512 );
|
||||||
|
|
||||||
groestl512_4way_init( &ctx.groestl, 64 );
|
if ( ( vh_mask & 0x0f ) != 0x0f )
|
||||||
groestl512_4way_update_close( &ctx.groestl, vhashA, vhashA, 512 );
|
groestl512_4way_full( &ctx.groestl, vhashA, vhashA, 64 );
|
||||||
groestl512_4way_init( &ctx.groestl, 64 );
|
if ( ( vh_mask & 0xf0 ) != 0xf0 )
|
||||||
groestl512_4way_update_close( &ctx.groestl, vhashB, vhashB, 512 );
|
groestl512_4way_full( &ctx.groestl, vhashB, vhashB, 64 );
|
||||||
|
|
||||||
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
rintrlv_4x128_8x64( vhash, vhashA, vhashB, 512 );
|
||||||
|
|
||||||
@@ -180,22 +139,22 @@ void quark_8way_hash( void *state, const void *input )
|
|||||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||||
vhash, 512 );
|
vhash, 512 );
|
||||||
|
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash0[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash1[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash2[0] & 8)
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash3[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash4[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash4, (char*)hash4, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash5[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash5, (char*)hash5, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash6[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash6, (char*)hash6, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash7[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash7, (char*)hash7, 512 );
|
||||||
|
|
||||||
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
intrlv_8x64( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
|
||||||
512 );
|
512 );
|
||||||
@@ -209,27 +168,16 @@ void quark_8way_hash( void *state, const void *input )
|
|||||||
zero );
|
zero );
|
||||||
|
|
||||||
if ( ( vh_mask & 0xff ) != 0xff )
|
if ( ( vh_mask & 0xff ) != 0xff )
|
||||||
{
|
blake512_8way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||||
blake512_8way_init( &ctx.blake );
|
|
||||||
blake512_8way_update( &ctx.blake, vhash, 64 );
|
|
||||||
blake512_8way_close( &ctx.blake, vhashA );
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( vh_mask & 0xff )
|
if ( vh_mask & 0xff )
|
||||||
{
|
bmw512_8way_full( &ctx.bmw, vhashB, vhash, 64 );
|
||||||
bmw512_8way_init( &ctx.bmw );
|
|
||||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
|
||||||
bmw512_8way_close( &ctx.bmw, vhashB );
|
|
||||||
}
|
|
||||||
|
|
||||||
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
mm512_blend_hash_8x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
keccak512_8way_update( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_8way_close( &ctx.keccak, vhash );
|
keccak512_8way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
skein512_8way_init( &ctx.skein );
|
skein512_8way_full( &ctx.skein, vhash, vhash, 64 );
|
||||||
skein512_8way_update( &ctx.skein, vhash, 64 );
|
|
||||||
skein512_8way_close( &ctx.skein, vhash );
|
|
||||||
|
|
||||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||||
zero );
|
zero );
|
||||||
@@ -258,41 +206,44 @@ void quark_8way_hash( void *state, const void *input )
|
|||||||
int scanhash_quark_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_quark_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[8*8] __attribute__ ((aligned (128)));
|
uint64_t hash64[4*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t vdata[24*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[49]);
|
uint64_t *hash64_q3 = &(hash64[3*8]);
|
||||||
uint32_t *pdata = work->data;
|
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
int thr_id = mythr->id;
|
__m512i *noncev = (__m512i*)vdata + 9;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||||
|
*noncev = mm512_intrlv_blend_32(
|
||||||
|
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
|
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
quark_8way_hash( hash64, vdata );
|
||||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
|
||||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
|
||||||
|
|
||||||
quark_8way_hash( hash, vdata );
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
pdata[19] = n;
|
if ( unlikely( hash64_q3[ lane ] <= targ64_q3 && !bench ) )
|
||||||
|
|
||||||
for ( int i = 0; i < 8; i++ )
|
|
||||||
if ( unlikely( hash7[ i<<1 ] <= Htarg ) )
|
|
||||||
{
|
{
|
||||||
extr_lane_8x64( lane_hash, hash, i, 256 );
|
extr_lane_8x64( lane_hash, hash64, lane, 256 );
|
||||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, i );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
|
m512_const1_64( 0x0000000800000000 ) );
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart );
|
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||||
|
|
||||||
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -333,67 +284,47 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
__m256i* vhA = (__m256i*)vhashA;
|
__m256i* vhA = (__m256i*)vhashA;
|
||||||
__m256i* vhB = (__m256i*)vhashB;
|
__m256i* vhB = (__m256i*)vhashB;
|
||||||
__m256i vh_mask;
|
__m256i vh_mask;
|
||||||
|
int h_mask;
|
||||||
quark_4way_ctx_holder ctx;
|
quark_4way_ctx_holder ctx;
|
||||||
const __m256i bit3_mask = m256_const1_64( 8 );
|
const __m256i bit3_mask = m256_const1_64( 8 );
|
||||||
const uint32_t mask = 8;
|
|
||||||
const __m256i zero = _mm256_setzero_si256();
|
const __m256i zero = _mm256_setzero_si256();
|
||||||
|
|
||||||
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
||||||
|
|
||||||
blake512_4way_update( &ctx.blake, input, 80 );
|
blake512_4way_full( &ctx.blake, vhash, input, 80 );
|
||||||
blake512_4way_close( &ctx.blake, vhash );
|
|
||||||
|
|
||||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||||
bmw512_4way_close( &ctx.bmw, vhash );
|
bmw512_4way_close( &ctx.bmw, vhash );
|
||||||
|
|
||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||||
|
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||||
|
|
||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
if ( hash0[0] & mask )
|
// A
|
||||||
{
|
if ( hash0[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0,
|
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
(char*)hash0, 512 );
|
if ( hash1[0] & 8 )
|
||||||
}
|
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
if ( hash1[0] & mask )
|
if ( hash2[0] & 8)
|
||||||
{
|
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
if ( hash3[0] & 8 )
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1,
|
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
(char*)hash1, 512 );
|
|
||||||
}
|
|
||||||
if ( hash2[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2,
|
|
||||||
(char*)hash2, 512 );
|
|
||||||
}
|
|
||||||
if ( hash3[0] & mask )
|
|
||||||
{
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3,
|
|
||||||
(char*)hash3, 512 );
|
|
||||||
}
|
|
||||||
|
|
||||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
if ( mm256_anybits1( vh_mask ) )
|
// B
|
||||||
{
|
if ( likely( h_mask & 0xffffffff ) )
|
||||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
skein512_4way_full( &ctx.skein, vhashB, vhash, 64 );
|
||||||
skein512_4way_close( &ctx.skein, vhashB );
|
|
||||||
}
|
|
||||||
|
|
||||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||||
|
|
||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
reinit_groestl( &ctx.groestl );
|
groestl512_full( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
groestl512_full( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
groestl512_full( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 512 );
|
|
||||||
reinit_groestl( &ctx.groestl );
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
|
||||||
|
|
||||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
@@ -401,15 +332,13 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
jh512_4way_close( &ctx.jh, vhash );
|
jh512_4way_close( &ctx.jh, vhash );
|
||||||
|
|
||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||||
|
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||||
|
|
||||||
if ( mm256_anybits0( vh_mask ) )
|
// A
|
||||||
{
|
if ( likely( ( h_mask & 0xffffffff ) != 0xffffffff ) )
|
||||||
blake512_4way_init( &ctx.blake );
|
blake512_4way_full( &ctx.blake, vhashA, vhash, 64 );
|
||||||
blake512_4way_update( &ctx.blake, vhash, 64 );
|
// B
|
||||||
blake512_4way_close( &ctx.blake, vhashA );
|
if ( likely( h_mask & 0xffffffff ) )
|
||||||
}
|
|
||||||
|
|
||||||
if ( mm256_anybits1( vh_mask ) )
|
|
||||||
{
|
{
|
||||||
bmw512_4way_init( &ctx.bmw );
|
bmw512_4way_init( &ctx.bmw );
|
||||||
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
bmw512_4way_update( &ctx.bmw, vhash, 64 );
|
||||||
@@ -421,20 +350,20 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhash );
|
keccak512_4way_close( &ctx.keccak, vhash );
|
||||||
|
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_full( &ctx.skein, vhash, vhash, 64 );
|
||||||
skein512_4way_update( &ctx.skein, vhash, 64 );
|
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
|
||||||
|
|
||||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||||
|
h_mask = _mm256_movemask_epi8( vh_mask );
|
||||||
|
|
||||||
if ( mm256_anybits0( vh_mask ) )
|
// A
|
||||||
|
if ( likely( ( h_mask & 0xffffffff ) != 0xffffffff ) )
|
||||||
{
|
{
|
||||||
keccak512_4way_init( &ctx.keccak );
|
keccak512_4way_init( &ctx.keccak );
|
||||||
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
keccak512_4way_update( &ctx.keccak, vhash, 64 );
|
||||||
keccak512_4way_close( &ctx.keccak, vhashA );
|
keccak512_4way_close( &ctx.keccak, vhashA );
|
||||||
}
|
}
|
||||||
|
// B
|
||||||
if ( mm256_anybits1( vh_mask ) )
|
if ( likely( h_mask & 0xffffffff ) )
|
||||||
{
|
{
|
||||||
jh512_4way_init( &ctx.jh );
|
jh512_4way_init( &ctx.jh );
|
||||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||||
@@ -451,41 +380,44 @@ void quark_4way_hash( void *state, const void *input )
|
|||||||
int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint64_t hash64[4*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[25]);
|
uint64_t *hash64_q3 = &(hash64[3*4]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
|
const uint64_t targ64_q3 = ((uint64_t*)ptarget)[3];
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
int thr_id = mythr->id;
|
__m256i *noncev = (__m256i*)vdata + 9;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
|
*noncev = mm256_intrlv_blend_32(
|
||||||
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
quark_4way_hash( hash64, vdata );
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
|
||||||
|
|
||||||
quark_4way_hash( hash, vdata );
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
pdata[19] = n;
|
if ( hash64_q3[ lane ] <= targ64_q3 && !bench )
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
|
||||||
if ( unlikely( hash7[ i<<1 ] <= Htarg ) )
|
|
||||||
{
|
{
|
||||||
extr_lane_4x64( lane_hash, hash, i, 256 );
|
extr_lane_4x64( lane_hash, hash64, lane, 256 );
|
||||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, i );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( likely( ( n < last_nonce ) && !work_restart[thr_id].restart ) );
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -106,13 +106,13 @@ int scanhash_deep_2way( struct work *work,uint32_t max_nonce,
|
|||||||
if ( fulltest( hash, ptarget) && !opt_benchmark )
|
if ( fulltest( hash, ptarget) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
submit_lane_solution( work, hash, mythr, 0 );
|
submit_solution( work, hash, mythr );
|
||||||
}
|
}
|
||||||
if ( !( (hash+8)[7] & mask ) )
|
if ( !( (hash+8)[7] & mask ) )
|
||||||
if ( fulltest( hash+8, ptarget) && !opt_benchmark )
|
if ( fulltest( hash+8, ptarget) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+1;
|
pdata[19] = n+1;
|
||||||
submit_lane_solution( work, hash+8, mythr, 1 );
|
submit_solution( work, hash+8, mythr );
|
||||||
}
|
}
|
||||||
n += 2;
|
n += 2;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -153,7 +153,7 @@ int scanhash_qubit_4way( struct work *work,uint32_t max_nonce,
|
|||||||
if ( likely( fulltest( hash+(lane<<3), ptarget) && !opt_benchmark ) )
|
if ( likely( fulltest( hash+(lane<<3), ptarget) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
submit_solution( work, hash+(lane<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart );
|
||||||
@@ -255,13 +255,13 @@ int scanhash_qubit_2way( struct work *work,uint32_t max_nonce,
|
|||||||
if ( likely( fulltest( hash, ptarget) && !opt_benchmark ) )
|
if ( likely( fulltest( hash, ptarget) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
submit_lane_solution( work, hash, mythr, 0 );
|
submit_solution( work, hash, mythr );
|
||||||
}
|
}
|
||||||
if ( unlikely( ( (hash+8))[7] <= Htarg ) )
|
if ( unlikely( ( (hash+8))[7] <= Htarg ) )
|
||||||
if ( likely( fulltest( hash+8, ptarget) && !opt_benchmark ) )
|
if ( likely( fulltest( hash+8, ptarget) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n+1;
|
pdata[19] = n+1;
|
||||||
submit_lane_solution( work, hash+8, mythr, 1 );
|
submit_solution( work, hash+8, mythr );
|
||||||
}
|
}
|
||||||
n += 2;
|
n += 2;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -132,7 +132,7 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
if ( likely( fulltest( lane_hash, ptarget ) && !opt_benchmark ) )
|
||||||
{
|
{
|
||||||
pdata[27] = n + i;
|
pdata[27] = n + i;
|
||||||
submit_lane_solution( work, lane_hash, mythr, i );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 16;
|
n += 16;
|
||||||
@@ -251,7 +251,7 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[27] = n + i;
|
pdata[27] = n + i;
|
||||||
submit_lane_solution( work, lane_hash, mythr, i );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
|
@@ -380,7 +380,7 @@ static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
|
|||||||
#endif /* HAVE_SHA256_8WAY */
|
#endif /* HAVE_SHA256_8WAY */
|
||||||
|
|
||||||
|
|
||||||
#if defined(USE_ASM) && defined(__x86_64__)
|
//#if defined(USE_ASM) && defined(__x86_64__)
|
||||||
|
|
||||||
#define SCRYPT_MAX_WAYS 12
|
#define SCRYPT_MAX_WAYS 12
|
||||||
#define HAVE_SCRYPT_3WAY 1
|
#define HAVE_SCRYPT_3WAY 1
|
||||||
@@ -394,113 +394,6 @@ void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
|||||||
void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
|
void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#elif defined(USE_ASM) && defined(__i386__)
|
|
||||||
|
|
||||||
#define SCRYPT_MAX_WAYS 4
|
|
||||||
#define scrypt_best_throughput() 1
|
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
|
||||||
|
|
||||||
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
|
|
||||||
|
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
|
||||||
#if defined(__ARM_NEON__)
|
|
||||||
#undef HAVE_SHA256_4WAY
|
|
||||||
#define SCRYPT_MAX_WAYS 3
|
|
||||||
#define HAVE_SCRYPT_3WAY 1
|
|
||||||
#define scrypt_best_throughput() 3
|
|
||||||
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
|
|
||||||
{
|
|
||||||
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
x00 = (B[ 0] ^= Bx[ 0]);
|
|
||||||
x01 = (B[ 1] ^= Bx[ 1]);
|
|
||||||
x02 = (B[ 2] ^= Bx[ 2]);
|
|
||||||
x03 = (B[ 3] ^= Bx[ 3]);
|
|
||||||
x04 = (B[ 4] ^= Bx[ 4]);
|
|
||||||
x05 = (B[ 5] ^= Bx[ 5]);
|
|
||||||
x06 = (B[ 6] ^= Bx[ 6]);
|
|
||||||
x07 = (B[ 7] ^= Bx[ 7]);
|
|
||||||
x08 = (B[ 8] ^= Bx[ 8]);
|
|
||||||
x09 = (B[ 9] ^= Bx[ 9]);
|
|
||||||
x10 = (B[10] ^= Bx[10]);
|
|
||||||
x11 = (B[11] ^= Bx[11]);
|
|
||||||
x12 = (B[12] ^= Bx[12]);
|
|
||||||
x13 = (B[13] ^= Bx[13]);
|
|
||||||
x14 = (B[14] ^= Bx[14]);
|
|
||||||
x15 = (B[15] ^= Bx[15]);
|
|
||||||
for (i = 0; i < 8; i += 2) {
|
|
||||||
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
|
|
||||||
/* Operate on columns. */
|
|
||||||
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
|
|
||||||
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
|
|
||||||
|
|
||||||
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
|
|
||||||
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
|
|
||||||
|
|
||||||
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
|
|
||||||
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
|
|
||||||
|
|
||||||
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
|
|
||||||
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
|
|
||||||
|
|
||||||
/* Operate on rows. */
|
|
||||||
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
|
|
||||||
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
|
|
||||||
|
|
||||||
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
|
|
||||||
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
|
|
||||||
|
|
||||||
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
|
|
||||||
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
|
|
||||||
|
|
||||||
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
|
|
||||||
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
|
|
||||||
#undef R
|
|
||||||
}
|
|
||||||
B[ 0] += x00;
|
|
||||||
B[ 1] += x01;
|
|
||||||
B[ 2] += x02;
|
|
||||||
B[ 3] += x03;
|
|
||||||
B[ 4] += x04;
|
|
||||||
B[ 5] += x05;
|
|
||||||
B[ 6] += x06;
|
|
||||||
B[ 7] += x07;
|
|
||||||
B[ 8] += x08;
|
|
||||||
B[ 9] += x09;
|
|
||||||
B[10] += x10;
|
|
||||||
B[11] += x11;
|
|
||||||
B[12] += x12;
|
|
||||||
B[13] += x13;
|
|
||||||
B[14] += x14;
|
|
||||||
B[15] += x15;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < N; i++) {
|
|
||||||
memcpy(&V[i * 32], X, 128);
|
|
||||||
xor_salsa8(&X[0], &X[16]);
|
|
||||||
xor_salsa8(&X[16], &X[0]);
|
|
||||||
}
|
|
||||||
for (i = 0; i < N; i++) {
|
|
||||||
uint32_t j = 32 * (X[16] & (N - 1));
|
|
||||||
for (uint8_t k = 0; k < 32; k++)
|
|
||||||
X[k] ^= V[j + k];
|
|
||||||
xor_salsa8(&X[0], &X[16]);
|
|
||||||
xor_salsa8(&X[16], &X[0]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef SCRYPT_MAX_WAYS
|
#ifndef SCRYPT_MAX_WAYS
|
||||||
#define SCRYPT_MAX_WAYS 1
|
#define SCRYPT_MAX_WAYS 1
|
||||||
#define scrypt_best_throughput() 1
|
#define scrypt_best_throughput() 1
|
||||||
@@ -511,8 +404,8 @@ unsigned char *scrypt_buffer_alloc(int N)
|
|||||||
return (uchar*) malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
|
return (uchar*) malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
static bool scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N)
|
uint32_t *midstate, unsigned char *scratchpad, int N, int thr_id )
|
||||||
{
|
{
|
||||||
uint32_t tstate[8], ostate[8];
|
uint32_t tstate[8], ostate[8];
|
||||||
uint32_t X[32];
|
uint32_t X[32];
|
||||||
@@ -527,11 +420,13 @@ static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
|||||||
scrypt_core(X, V, N);
|
scrypt_core(X, V, N);
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
static int scrypt_1024_1_1_256_4way(const uint32_t *input,
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
|
||||||
|
int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[4 * 8];
|
uint32_t _ALIGN(128) tstate[4 * 8];
|
||||||
uint32_t _ALIGN(128) ostate[4 * 8];
|
uint32_t _ALIGN(128) ostate[4 * 8];
|
||||||
@@ -545,32 +440,47 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
|||||||
for (i = 0; i < 20; i++)
|
for (i = 0; i < 20; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
W[4 * i + k] = input[k * 20 + i];
|
W[4 * i + k] = input[k * 20 + i];
|
||||||
|
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
tstate[4 * i + k] = midstate[i];
|
tstate[4 * i + k] = midstate[i];
|
||||||
|
|
||||||
HMAC_SHA256_80_init_4way(W, tstate, ostate);
|
HMAC_SHA256_80_init_4way(W, tstate, ostate);
|
||||||
|
|
||||||
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
|
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
X[k * 32 + i] = W[4 * i + k];
|
X[k * 32 + i] = W[4 * i + k];
|
||||||
|
|
||||||
scrypt_core(X + 0 * 32, V, N);
|
scrypt_core(X + 0 * 32, V, N);
|
||||||
scrypt_core(X + 1 * 32, V, N);
|
scrypt_core(X + 1 * 32, V, N);
|
||||||
scrypt_core(X + 2 * 32, V, N);
|
scrypt_core(X + 2 * 32, V, N);
|
||||||
scrypt_core(X + 3 * 32, V, N);
|
scrypt_core(X + 3 * 32, V, N);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
W[4 * i + k] = X[k * 32 + i];
|
W[4 * i + k] = X[k * 32 + i];
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
|
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
|
||||||
|
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
output[k * 8 + i] = W[4 * i + k];
|
output[k * 8 + i] = W[4 * i + k];
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_SHA256_4WAY */
|
#endif /* HAVE_SHA256_4WAY */
|
||||||
|
|
||||||
#ifdef HAVE_SCRYPT_3WAY
|
#ifdef HAVE_SCRYPT_3WAY
|
||||||
|
|
||||||
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
static int scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
|
||||||
|
int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) tstate[3 * 8], ostate[3 * 8];
|
uint32_t _ALIGN(64) tstate[3 * 8], ostate[3 * 8];
|
||||||
uint32_t _ALIGN(64) X[3 * 32];
|
uint32_t _ALIGN(64) X[3 * 32];
|
||||||
@@ -581,23 +491,34 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
|||||||
memcpy(tstate + 0, midstate, 32);
|
memcpy(tstate + 0, midstate, 32);
|
||||||
memcpy(tstate + 8, midstate, 32);
|
memcpy(tstate + 8, midstate, 32);
|
||||||
memcpy(tstate + 16, midstate, 32);
|
memcpy(tstate + 16, midstate, 32);
|
||||||
|
|
||||||
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
|
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
|
||||||
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
|
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
|
||||||
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
|
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
|
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
|
||||||
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
|
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
|
||||||
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
|
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
scrypt_core_3way(X, V, N);
|
scrypt_core_3way(X, V, N);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
|
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
|
||||||
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
|
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
|
||||||
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
|
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
static bool scrypt_1024_1_1_256_12way(const uint32_t *input,
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N,
|
||||||
|
int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[12 * 8];
|
uint32_t _ALIGN(128) tstate[12 * 8];
|
||||||
uint32_t _ALIGN(128) ostate[12 * 8];
|
uint32_t _ALIGN(128) ostate[12 * 8];
|
||||||
@@ -612,43 +533,60 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
|||||||
for (i = 0; i < 20; i++)
|
for (i = 0; i < 20; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
|
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
|
||||||
|
|
||||||
for (j = 0; j < 3; j++)
|
for (j = 0; j < 3; j++)
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
tstate[32 * j + 4 * i + k] = midstate[i];
|
tstate[32 * j + 4 * i + k] = midstate[i];
|
||||||
|
|
||||||
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
|
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
|
||||||
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
|
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
|
||||||
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
|
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||||
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
||||||
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
for (j = 0; j < 3; j++)
|
for (j = 0; j < 3; j++)
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
|
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
|
||||||
|
|
||||||
scrypt_core_3way(X + 0 * 96, V, N);
|
scrypt_core_3way(X + 0 * 96, V, N);
|
||||||
scrypt_core_3way(X + 1 * 96, V, N);
|
scrypt_core_3way(X + 1 * 96, V, N);
|
||||||
scrypt_core_3way(X + 2 * 96, V, N);
|
scrypt_core_3way(X + 2 * 96, V, N);
|
||||||
scrypt_core_3way(X + 3 * 96, V, N);
|
scrypt_core_3way(X + 3 * 96, V, N);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
for (j = 0; j < 3; j++)
|
for (j = 0; j < 3; j++)
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
|
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||||
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
||||||
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||||
|
|
||||||
for (j = 0; j < 3; j++)
|
for (j = 0; j < 3; j++)
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
|
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_SHA256_4WAY */
|
#endif /* HAVE_SHA256_4WAY */
|
||||||
|
|
||||||
#endif /* HAVE_SCRYPT_3WAY */
|
#endif /* HAVE_SCRYPT_3WAY */
|
||||||
|
|
||||||
#ifdef HAVE_SCRYPT_6WAY
|
#ifdef HAVE_SCRYPT_6WAY
|
||||||
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
static int scrypt_1024_1_1_256_24way( const uint32_t *input,
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
uint32_t *output, uint32_t *midstate,
|
||||||
|
unsigned char *scratchpad, int N, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) tstate[24 * 8];
|
uint32_t _ALIGN(128) tstate[24 * 8];
|
||||||
uint32_t _ALIGN(128) ostate[24 * 8];
|
uint32_t _ALIGN(128) ostate[24 * 8];
|
||||||
@@ -663,35 +601,54 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
|||||||
for ( i = 0; i < 20; i++ )
|
for ( i = 0; i < 20; i++ )
|
||||||
for ( k = 0; k < 8; k++ )
|
for ( k = 0; k < 8; k++ )
|
||||||
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
|
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
|
||||||
|
|
||||||
for ( j = 0; j < 3; j++ )
|
for ( j = 0; j < 3; j++ )
|
||||||
for ( i = 0; i < 8; i++ )
|
for ( i = 0; i < 8; i++ )
|
||||||
for ( k = 0; k < 8; k++ )
|
for ( k = 0; k < 8; k++ )
|
||||||
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
|
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
|
||||||
|
|
||||||
HMAC_SHA256_80_init_8way( W + 0, tstate + 0, ostate + 0 );
|
HMAC_SHA256_80_init_8way( W + 0, tstate + 0, ostate + 0 );
|
||||||
HMAC_SHA256_80_init_8way( W + 256, tstate + 64, ostate + 64 );
|
HMAC_SHA256_80_init_8way( W + 256, tstate + 64, ostate + 64 );
|
||||||
HMAC_SHA256_80_init_8way( W + 512, tstate + 128, ostate + 128 );
|
HMAC_SHA256_80_init_8way( W + 512, tstate + 128, ostate + 128 );
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
PBKDF2_SHA256_80_128_8way( tstate + 0, ostate + 0, W + 0, W + 0 );
|
PBKDF2_SHA256_80_128_8way( tstate + 0, ostate + 0, W + 0, W + 0 );
|
||||||
PBKDF2_SHA256_80_128_8way( tstate + 64, ostate + 64, W + 256, W + 256 );
|
PBKDF2_SHA256_80_128_8way( tstate + 64, ostate + 64, W + 256, W + 256 );
|
||||||
PBKDF2_SHA256_80_128_8way( tstate + 128, ostate + 128, W + 512, W + 512 );
|
PBKDF2_SHA256_80_128_8way( tstate + 128, ostate + 128, W + 512, W + 512 );
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
for ( j = 0; j < 3; j++ )
|
for ( j = 0; j < 3; j++ )
|
||||||
for ( i = 0; i < 32; i++ )
|
for ( i = 0; i < 32; i++ )
|
||||||
for ( k = 0; k < 8; k++ )
|
for ( k = 0; k < 8; k++ )
|
||||||
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
|
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
|
||||||
|
|
||||||
scrypt_core_6way( X + 0 * 32, V, N );
|
scrypt_core_6way( X + 0 * 32, V, N );
|
||||||
scrypt_core_6way( X + 6 * 32, V, N );
|
scrypt_core_6way( X + 6 * 32, V, N );
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
scrypt_core_6way( X + 12 * 32, V, N );
|
scrypt_core_6way( X + 12 * 32, V, N );
|
||||||
scrypt_core_6way( X + 18 * 32, V, N );
|
scrypt_core_6way( X + 18 * 32, V, N );
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
for ( j = 0; j < 3; j++ )
|
for ( j = 0; j < 3; j++ )
|
||||||
for ( i = 0; i < 32; i++ )
|
for ( i = 0; i < 32; i++ )
|
||||||
for ( k = 0; k < 8; k++ )
|
for ( k = 0; k < 8; k++ )
|
||||||
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
|
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32_8way( tstate + 0, ostate + 0, W + 0, W + 0 );
|
PBKDF2_SHA256_128_32_8way( tstate + 0, ostate + 0, W + 0, W + 0 );
|
||||||
PBKDF2_SHA256_128_32_8way( tstate + 64, ostate + 64, W + 256, W + 256 );
|
PBKDF2_SHA256_128_32_8way( tstate + 64, ostate + 64, W + 256, W + 256 );
|
||||||
PBKDF2_SHA256_128_32_8way( tstate + 128, ostate + 128, W + 512, W + 512 );
|
PBKDF2_SHA256_128_32_8way( tstate + 128, ostate + 128, W + 512, W + 512 );
|
||||||
|
|
||||||
for ( j = 0; j < 3; j++ )
|
for ( j = 0; j < 3; j++ )
|
||||||
for ( i = 0; i < 8; i++ )
|
for ( i = 0; i < 8; i++ )
|
||||||
for ( k = 0; k < 8; k++ )
|
for ( k = 0; k < 8; k++ )
|
||||||
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
|
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_SCRYPT_6WAY */
|
#endif /* HAVE_SCRYPT_6WAY */
|
||||||
|
|
||||||
@@ -703,16 +660,18 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
||||||
uint32_t midstate[8];
|
uint32_t midstate[8];
|
||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
int throughput = scrypt_best_throughput();
|
int throughput = scrypt_best_throughput();
|
||||||
int i;
|
int i;
|
||||||
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
if (sha256_use_4way())
|
if (sha256_use_4way())
|
||||||
throughput *= 4;
|
throughput *= 4;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// applog(LOG_INFO,"Scrypt thoughput %d",throughput);
|
||||||
|
|
||||||
for (i = 0; i < throughput; i++)
|
for (i = 0; i < throughput; i++)
|
||||||
memcpy(data + i * 20, pdata, 80);
|
memcpy(data + i * 20, pdata, 80);
|
||||||
|
|
||||||
@@ -720,46 +679,50 @@ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
|||||||
sha256_transform(midstate, data, 0);
|
sha256_transform(midstate, data, 0);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
bool rc = true;
|
||||||
for (i = 0; i < throughput; i++)
|
for (i = 0; i < throughput; i++)
|
||||||
data[i * 20 + 19] = ++n;
|
data[i * 20 + 19] = ++n;
|
||||||
|
|
||||||
#if defined(HAVE_SHA256_4WAY)
|
#if defined(HAVE_SHA256_4WAY)
|
||||||
if (throughput == 4)
|
if (throughput == 4)
|
||||||
scrypt_1024_1_1_256_4way(data, hash, midstate,
|
rc = scrypt_1024_1_1_256_4way(data, hash, midstate,
|
||||||
scratchbuf, scratchbuf_size );
|
scratchbuf, scratchbuf_size, thr_id );
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
|
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
|
||||||
if (throughput == 12)
|
if (throughput == 12)
|
||||||
scrypt_1024_1_1_256_12way(data, hash, midstate,
|
rc = scrypt_1024_1_1_256_12way(data, hash, midstate,
|
||||||
scratchbuf, scratchbuf_size );
|
scratchbuf, scratchbuf_size, thr_id );
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_SCRYPT_6WAY)
|
#if defined(HAVE_SCRYPT_6WAY)
|
||||||
if (throughput == 24)
|
if (throughput == 24)
|
||||||
scrypt_1024_1_1_256_24way(data, hash, midstate,
|
rc = scrypt_1024_1_1_256_24way(data, hash, midstate,
|
||||||
scratchbuf, scratchbuf_size );
|
scratchbuf, scratchbuf_size, thr_id );
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_SCRYPT_3WAY)
|
#if defined(HAVE_SCRYPT_3WAY)
|
||||||
if (throughput == 3)
|
if (throughput == 3)
|
||||||
scrypt_1024_1_1_256_3way(data, hash, midstate,
|
rc = scrypt_1024_1_1_256_3way(data, hash, midstate,
|
||||||
scratchbuf, scratchbuf_size );
|
scratchbuf, scratchbuf_size, thr_id );
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf,
|
rc = scrypt_1024_1_1_256(data, hash, midstate, scratchbuf,
|
||||||
scratchbuf_size );
|
scratchbuf_size, thr_id );
|
||||||
|
|
||||||
for (i = 0; i < throughput; i++) {
|
if ( rc )
|
||||||
if (unlikely(hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget))) {
|
for ( i = 0; i < throughput; i++ )
|
||||||
|
{
|
||||||
|
if ( unlikely( valid_hash( hash + i * 8, ptarget ) ) )
|
||||||
|
{
|
||||||
pdata[19] = data[i * 20 + 19];
|
pdata[19] = data[i * 20 + 19];
|
||||||
submit_solution( work, hash, mythr );
|
submit_solution( work, hash + i * 8, mythr );
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} while (likely(n < max_nonce && !work_restart[thr_id].restart));
|
|
||||||
|
|
||||||
*hashes_done = n - pdata[19] + 1;
|
}
|
||||||
|
} while ( likely( ( n < ( max_nonce - throughput ) ) && !(*restart) ) );
|
||||||
|
|
||||||
|
*hashes_done = n - pdata[19];
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -778,7 +741,6 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
|||||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||||
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
|
||||||
gate->scanhash = (void*)&scanhash_scrypt;
|
gate->scanhash = (void*)&scanhash_scrypt;
|
||||||
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
|
|
||||||
opt_target_factor = 65536.0;
|
opt_target_factor = 65536.0;
|
||||||
|
|
||||||
if ( !opt_param_n )
|
if ( !opt_param_n )
|
||||||
|
440
algo/sha/hmac-sha256-hash-4way.c
Normal file
440
algo/sha/hmac-sha256-hash-4way.c
Normal file
@@ -0,0 +1,440 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2005,2007,2009 Colin Percival
|
||||||
|
* Copywright 2020 JayDDee246@gmail.com
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "hmac-sha256-hash-4way.h"
|
||||||
|
#include "compat.h"
|
||||||
|
|
||||||
|
// HMAC 4-way SSE2
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HMAC_SHA256_Buf(K, Klen, in, len, digest):
|
||||||
|
* Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of
|
||||||
|
* length ${Klen}, and write the result to ${digest}.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
hmac_sha256_4way_full( void *digest, const void *K, size_t Klen,
|
||||||
|
const void *in, size_t len )
|
||||||
|
{
|
||||||
|
hmac_sha256_4way_context ctx;
|
||||||
|
hmac_sha256_4way_init( &ctx, K, Klen );
|
||||||
|
hmac_sha256_4way_update( &ctx, in, len );
|
||||||
|
hmac_sha256_4way_close( &ctx, digest );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||||
|
void
|
||||||
|
hmac_sha256_4way_init( hmac_sha256_4way_context *ctx, const void *_K,
|
||||||
|
size_t Klen )
|
||||||
|
{
|
||||||
|
unsigned char pad[64*4] __attribute__ ((aligned (64)));
|
||||||
|
unsigned char khash[32*4] __attribute__ ((aligned (64)));
|
||||||
|
const unsigned char * K = _K;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
/* If Klen > 64, the key is really SHA256(K). */
|
||||||
|
if ( Klen > 64 )
|
||||||
|
{
|
||||||
|
sha256_4way_init( &ctx->ictx );
|
||||||
|
sha256_4way_update( &ctx->ictx, K, Klen );
|
||||||
|
sha256_4way_close( &ctx->ictx, khash );
|
||||||
|
K = khash;
|
||||||
|
Klen = 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||||
|
sha256_4way_init( &ctx->ictx );
|
||||||
|
memset( pad, 0x36, 64*4 );
|
||||||
|
|
||||||
|
for ( i = 0; i < Klen; i++ )
|
||||||
|
casti_m128i( pad, i ) = _mm_xor_si128( casti_m128i( pad, i ),
|
||||||
|
casti_m128i( K, i ) );
|
||||||
|
|
||||||
|
sha256_4way_update( &ctx->ictx, pad, 64 );
|
||||||
|
|
||||||
|
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||||
|
sha256_4way_init( &ctx->octx );
|
||||||
|
memset( pad, 0x5c, 64*4 );
|
||||||
|
for ( i = 0; i < Klen/4; i++ )
|
||||||
|
casti_m128i( pad, i ) = _mm_xor_si128( casti_m128i( pad, i ),
|
||||||
|
casti_m128i( K, i ) );
|
||||||
|
sha256_4way_update( &ctx->octx, pad, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add bytes to the HMAC-SHA256 operation. */
|
||||||
|
void
|
||||||
|
hmac_sha256_4way_update( hmac_sha256_4way_context *ctx, const void *in,
|
||||||
|
size_t len )
|
||||||
|
{
|
||||||
|
/* Feed data to the inner SHA256 operation. */
|
||||||
|
sha256_4way_update( &ctx->ictx, in, len );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finish an HMAC-SHA256 operation. */
|
||||||
|
void
|
||||||
|
hmac_sha256_4way_close( hmac_sha256_4way_context *ctx, void *digest )
|
||||||
|
{
|
||||||
|
unsigned char ihash[32*4] __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
/* Finish the inner SHA256 operation. */
|
||||||
|
sha256_4way_close( &ctx->ictx, ihash );
|
||||||
|
|
||||||
|
/* Feed the inner hash to the outer SHA256 operation. */
|
||||||
|
sha256_4way_update( &ctx->octx, ihash, 32 );
|
||||||
|
|
||||||
|
/* Finish the outer SHA256 operation. */
|
||||||
|
sha256_4way_close( &ctx->octx, digest );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
pbkdf2_sha256_4way( uint8_t *buf, size_t dkLen,
|
||||||
|
const uint8_t *passwd, size_t passwdlen,
|
||||||
|
const uint8_t *salt, size_t saltlen, uint64_t c )
|
||||||
|
{
|
||||||
|
hmac_sha256_4way_context PShctx, hctx;
|
||||||
|
uint8_t _ALIGN(128) T[32*4];
|
||||||
|
uint8_t _ALIGN(128) U[32*4];
|
||||||
|
__m128i ivec;
|
||||||
|
size_t i, clen;
|
||||||
|
uint64_t j;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
/* Compute HMAC state after processing P and S. */
|
||||||
|
hmac_sha256_4way_init( &PShctx, passwd, passwdlen );
|
||||||
|
hmac_sha256_4way_update( &PShctx, salt, saltlen );
|
||||||
|
|
||||||
|
/* Iterate through the blocks. */
|
||||||
|
for ( i = 0; i * 32 < dkLen; i++ )
|
||||||
|
{
|
||||||
|
/* Generate INT(i + 1). */
|
||||||
|
ivec = _mm_set1_epi32( bswap_32( i+1 ) );
|
||||||
|
|
||||||
|
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||||
|
memcpy( &hctx, &PShctx, sizeof(hmac_sha256_4way_context) );
|
||||||
|
hmac_sha256_4way_update( &hctx, &ivec, 4 );
|
||||||
|
hmac_sha256_4way_close( &hctx, U );
|
||||||
|
|
||||||
|
/* T_i = U_1 ... */
|
||||||
|
memcpy( T, U, 32*4 );
|
||||||
|
|
||||||
|
for ( j = 2; j <= c; j++ )
|
||||||
|
{
|
||||||
|
/* Compute U_j. */
|
||||||
|
hmac_sha256_4way_init( &hctx, passwd, passwdlen );
|
||||||
|
hmac_sha256_4way_update( &hctx, U, 32 );
|
||||||
|
hmac_sha256_4way_close( &hctx, U );
|
||||||
|
|
||||||
|
/* ... xor U_j ... */
|
||||||
|
for ( k = 0; k < 8; k++ )
|
||||||
|
casti_m128i( T, k ) = _mm_xor_si128( casti_m128i( T, k ),
|
||||||
|
casti_m128i( U, k ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy as many bytes as necessary into buf. */
|
||||||
|
clen = dkLen - i * 32;
|
||||||
|
if ( clen > 32 )
|
||||||
|
clen = 32;
|
||||||
|
memcpy( &buf[ i*32*4 ], T, clen*4 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
|
// HMAC 8-way AVX2
|
||||||
|
|
||||||
|
void
|
||||||
|
hmac_sha256_8way_full( void *digest, const void *K, size_t Klen,
|
||||||
|
const void *in, size_t len )
|
||||||
|
{
|
||||||
|
hmac_sha256_8way_context ctx;
|
||||||
|
hmac_sha256_8way_init( &ctx, K, Klen );
|
||||||
|
hmac_sha256_8way_update( &ctx, in, len );
|
||||||
|
hmac_sha256_8way_close( &ctx, digest );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize an HMAC-SHA256 operation with the given key. */
|
||||||
|
void
|
||||||
|
hmac_sha256_8way_init( hmac_sha256_8way_context *ctx, const void *_K,
|
||||||
|
size_t Klen )
|
||||||
|
{
|
||||||
|
unsigned char pad[64*8] __attribute__ ((aligned (128)));
|
||||||
|
unsigned char khash[32*8] __attribute__ ((aligned (128)));
|
||||||
|
const unsigned char * K = _K;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
/* If Klen > 64, the key is really SHA256(K). */
|
||||||
|
if ( Klen > 64 )
|
||||||
|
{
|
||||||
|
sha256_8way_init( &ctx->ictx );
|
||||||
|
sha256_8way_update( &ctx->ictx, K, Klen );
|
||||||
|
sha256_8way_close( &ctx->ictx, khash );
|
||||||
|
K = khash;
|
||||||
|
Klen = 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||||
|
sha256_8way_init( &ctx->ictx );
|
||||||
|
memset( pad, 0x36, 64*8);
|
||||||
|
|
||||||
|
for ( i = 0; i < Klen/4; i++ )
|
||||||
|
casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ),
|
||||||
|
casti_m256i( K, i ) );
|
||||||
|
|
||||||
|
sha256_8way_update( &ctx->ictx, pad, 64 );
|
||||||
|
|
||||||
|
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||||
|
sha256_8way_init( &ctx->octx );
|
||||||
|
memset( pad, 0x5c, 64*8 );
|
||||||
|
for ( i = 0; i < Klen/4; i++ )
|
||||||
|
casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ),
|
||||||
|
casti_m256i( K, i ) );
|
||||||
|
sha256_8way_update( &ctx->octx, pad, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
hmac_sha256_8way_update( hmac_sha256_8way_context *ctx, const void *in,
|
||||||
|
size_t len )
|
||||||
|
{
|
||||||
|
/* Feed data to the inner SHA256 operation. */
|
||||||
|
sha256_8way_update( &ctx->ictx, in, len );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finish an HMAC-SHA256 operation. */
|
||||||
|
void
|
||||||
|
hmac_sha256_8way_close( hmac_sha256_8way_context *ctx, void *digest )
|
||||||
|
{
|
||||||
|
unsigned char ihash[32*8] __attribute__ ((aligned (128)));
|
||||||
|
|
||||||
|
/* Finish the inner SHA256 operation. */
|
||||||
|
sha256_8way_close( &ctx->ictx, ihash );
|
||||||
|
|
||||||
|
/* Feed the inner hash to the outer SHA256 operation. */
|
||||||
|
sha256_8way_update( &ctx->octx, ihash, 32 );
|
||||||
|
|
||||||
|
/* Finish the outer SHA256 operation. */
|
||||||
|
sha256_8way_close( &ctx->octx, digest );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
pbkdf2_sha256_8way( uint8_t *buf, size_t dkLen, const uint8_t *passwd,
|
||||||
|
size_t passwdlen, const uint8_t *salt, size_t saltlen,
|
||||||
|
uint64_t c )
|
||||||
|
{
|
||||||
|
hmac_sha256_8way_context PShctx, hctx;
|
||||||
|
uint8_t _ALIGN(128) T[32*8];
|
||||||
|
uint8_t _ALIGN(128) U[32*8];
|
||||||
|
size_t i, clen;
|
||||||
|
uint64_t j;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
/* Compute HMAC state after processing P and S. */
|
||||||
|
hmac_sha256_8way_init( &PShctx, passwd, passwdlen );
|
||||||
|
|
||||||
|
// saltlen can be odd number of bytes
|
||||||
|
hmac_sha256_8way_update( &PShctx, salt, saltlen );
|
||||||
|
|
||||||
|
/* Iterate through the blocks. */
|
||||||
|
for ( i = 0; i * 32 < dkLen; i++ )
|
||||||
|
{
|
||||||
|
__m256i ivec = _mm256_set1_epi32( bswap_32( i+1 ) );
|
||||||
|
|
||||||
|
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||||
|
memcpy( &hctx, &PShctx, sizeof(hmac_sha256_8way_context) );
|
||||||
|
hmac_sha256_8way_update( &hctx, &ivec, 4 );
|
||||||
|
hmac_sha256_8way_close( &hctx, U );
|
||||||
|
|
||||||
|
/* T_i = U_1 ... */
|
||||||
|
memcpy( T, U, 32*8 );
|
||||||
|
|
||||||
|
for ( j = 2; j <= c; j++ )
|
||||||
|
{
|
||||||
|
/* Compute U_j. */
|
||||||
|
hmac_sha256_8way_init( &hctx, passwd, passwdlen );
|
||||||
|
hmac_sha256_8way_update( &hctx, U, 32 );
|
||||||
|
hmac_sha256_8way_close( &hctx, U );
|
||||||
|
|
||||||
|
/* ... xor U_j ... */
|
||||||
|
for ( k = 0; k < 8; k++ )
|
||||||
|
casti_m256i( T, k ) = _mm256_xor_si256( casti_m256i( T, k ),
|
||||||
|
casti_m256i( U, k ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy as many bytes as necessary into buf. */
|
||||||
|
clen = dkLen - i * 32;
|
||||||
|
if ( clen > 32 )
|
||||||
|
clen = 32;
|
||||||
|
memcpy( &buf[ i*32*8 ], T, clen*8 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
|
// HMAC 16-way AVX512
|
||||||
|
|
||||||
|
void
|
||||||
|
hmac_sha256_16way_full( void *digest, const void *K, size_t Klen,
|
||||||
|
const void *in, size_t len )
|
||||||
|
{
|
||||||
|
hmac_sha256_16way_context ctx;
|
||||||
|
hmac_sha256_16way_init( &ctx, K, Klen );
|
||||||
|
hmac_sha256_16way_update( &ctx, in, len );
|
||||||
|
hmac_sha256_16way_close( &ctx, digest );
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
hmac_sha256_16way_init( hmac_sha256_16way_context *ctx, const void *_K,
|
||||||
|
size_t Klen )
|
||||||
|
{
|
||||||
|
unsigned char pad[64*16] __attribute__ ((aligned (128)));
|
||||||
|
unsigned char khash[32*16] __attribute__ ((aligned (128)));
|
||||||
|
const unsigned char * K = _K;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
/* If Klen > 64, the key is really SHA256(K). */
|
||||||
|
if ( Klen > 64 )
|
||||||
|
{
|
||||||
|
sha256_16way_init( &ctx->ictx );
|
||||||
|
sha256_16way_update( &ctx->ictx, K, Klen );
|
||||||
|
sha256_16way_close( &ctx->ictx, khash );
|
||||||
|
K = khash;
|
||||||
|
Klen = 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||||
|
sha256_16way_init( &ctx->ictx );
|
||||||
|
memset( pad, 0x36, 64*16 );
|
||||||
|
|
||||||
|
for ( i = 0; i < Klen; i++ )
|
||||||
|
casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ),
|
||||||
|
casti_m512i( K, i ) );
|
||||||
|
sha256_16way_update( &ctx->ictx, pad, 64 );
|
||||||
|
|
||||||
|
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||||
|
sha256_16way_init( &ctx->octx );
|
||||||
|
memset( pad, 0x5c, 64*16 );
|
||||||
|
for ( i = 0; i < Klen/4; i++ )
|
||||||
|
casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ),
|
||||||
|
casti_m512i( K, i ) );
|
||||||
|
sha256_16way_update( &ctx->octx, pad, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
hmac_sha256_16way_update( hmac_sha256_16way_context *ctx, const void *in,
|
||||||
|
size_t len )
|
||||||
|
{
|
||||||
|
/* Feed data to the inner SHA256 operation. */
|
||||||
|
sha256_16way_update( &ctx->ictx, in, len );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finish an HMAC-SHA256 operation. */
|
||||||
|
void
|
||||||
|
hmac_sha256_16way_close( hmac_sha256_16way_context *ctx, void *digest )
|
||||||
|
{
|
||||||
|
unsigned char ihash[32*16] __attribute__ ((aligned (128)));
|
||||||
|
|
||||||
|
/* Finish the inner SHA256 operation. */
|
||||||
|
sha256_16way_close( &ctx->ictx, ihash );
|
||||||
|
|
||||||
|
/* Feed the inner hash to the outer SHA256 operation. */
|
||||||
|
sha256_16way_update( &ctx->octx, ihash, 32 );
|
||||||
|
|
||||||
|
/* Finish the outer SHA256 operation. */
|
||||||
|
sha256_16way_close( &ctx->octx, digest );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
pbkdf2_sha256_16way( uint8_t *buf, size_t dkLen,
|
||||||
|
const uint8_t *passwd, size_t passwdlen,
|
||||||
|
const uint8_t *salt, size_t saltlen, uint64_t c )
|
||||||
|
{
|
||||||
|
hmac_sha256_16way_context PShctx, hctx;
|
||||||
|
uint8_t _ALIGN(128) T[32*16];
|
||||||
|
uint8_t _ALIGN(128) U[32*16];
|
||||||
|
__m512i ivec;
|
||||||
|
size_t i, clen;
|
||||||
|
uint64_t j;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
/* Compute HMAC state after processing P and S. */
|
||||||
|
hmac_sha256_16way_init( &PShctx, passwd, passwdlen );
|
||||||
|
hmac_sha256_16way_update( &PShctx, salt, saltlen );
|
||||||
|
|
||||||
|
/* Iterate through the blocks. */
|
||||||
|
for ( i = 0; i * 32 < dkLen; i++ )
|
||||||
|
{
|
||||||
|
/* Generate INT(i + 1). */
|
||||||
|
ivec = _mm512_set1_epi32( bswap_32( i+1 ) );
|
||||||
|
|
||||||
|
/* Compute U_1 = PRF(P, S || INT(i)). */
|
||||||
|
memcpy( &hctx, &PShctx, sizeof(hmac_sha256_16way_context) );
|
||||||
|
hmac_sha256_16way_update( &hctx, &ivec, 4 );
|
||||||
|
hmac_sha256_16way_close( &hctx, U );
|
||||||
|
|
||||||
|
/* T_i = U_1 ... */
|
||||||
|
memcpy( T, U, 32*16 );
|
||||||
|
|
||||||
|
for ( j = 2; j <= c; j++ )
|
||||||
|
{
|
||||||
|
/* Compute U_j. */
|
||||||
|
hmac_sha256_16way_init( &hctx, passwd, passwdlen );
|
||||||
|
hmac_sha256_16way_update( &hctx, U, 32 );
|
||||||
|
hmac_sha256_16way_close( &hctx, U );
|
||||||
|
|
||||||
|
/* ... xor U_j ... */
|
||||||
|
for ( k = 0; k < 8; k++ )
|
||||||
|
casti_m512i( T, k ) = _mm512_xor_si512( casti_m512i( T, k ),
|
||||||
|
casti_m512i( U, k ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy as many bytes as necessary into buf. */
|
||||||
|
clen = dkLen - i * 32;
|
||||||
|
if ( clen > 32 )
|
||||||
|
clen = 32;
|
||||||
|
memcpy( &buf[ i*32*16 ], T, clen*16 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // AVX512
|
||||||
|
#endif // AVX2
|
||||||
|
|
107
algo/sha/hmac-sha256-hash-4way.h
Normal file
107
algo/sha/hmac-sha256-hash-4way.h
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
/*-
|
||||||
|
* Copyright 2005,2007,2009 Colin Percival
|
||||||
|
* Copyright 2020 JayDDee@gmailcom
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HMAC_SHA256_4WAY_H__
|
||||||
|
#define HMAC_SHA256_4WAY_H__
|
||||||
|
|
||||||
|
|
||||||
|
// Tested only 8-way with null pers
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "simd-utils.h"
|
||||||
|
#include "sha-hash-4way.h"
|
||||||
|
|
||||||
|
typedef struct _hmac_sha256_4way_context
|
||||||
|
{
|
||||||
|
sha256_4way_context ictx;
|
||||||
|
sha256_4way_context octx;
|
||||||
|
} hmac_sha256_4way_context;
|
||||||
|
|
||||||
|
//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||||
|
void hmac_sha256_4way_init( hmac_sha256_4way_context *, const void *, size_t );
|
||||||
|
void hmac_sha256_4way_update( hmac_sha256_4way_context *, const void *,
|
||||||
|
size_t );
|
||||||
|
void hmac_sha256_4way_close( hmac_sha256_4way_context *, void* );
|
||||||
|
void hmac_sha256_4way_full( void*, const void *, size_t Klen, const void *,
|
||||||
|
size_t len );
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
|
||||||
|
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
|
||||||
|
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
|
||||||
|
*/
|
||||||
|
void pbkdf2_sha256_4way( uint8_t *, size_t, const uint8_t *, size_t,
|
||||||
|
const uint8_t *, size_t, uint64_t );
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
|
typedef struct _hmac_sha256_8way_context
|
||||||
|
{
|
||||||
|
sha256_8way_context ictx;
|
||||||
|
sha256_8way_context octx;
|
||||||
|
} hmac_sha256_8way_context;
|
||||||
|
|
||||||
|
//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||||
|
void hmac_sha256_8way_init( hmac_sha256_8way_context *, const void *, size_t );
|
||||||
|
void hmac_sha256_8way_update( hmac_sha256_8way_context *, const void *,
|
||||||
|
size_t );
|
||||||
|
void hmac_sha256_8way_close( hmac_sha256_8way_context *, void* );
|
||||||
|
void hmac_sha256_8way_full( void*, const void *, size_t Klen, const void *,
|
||||||
|
size_t len );
|
||||||
|
|
||||||
|
void pbkdf2_sha256_8way( uint8_t *, size_t, const uint8_t *, size_t,
|
||||||
|
const uint8_t *, size_t, uint64_t );
|
||||||
|
|
||||||
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
|
typedef struct _hmac_sha256_16way_context
|
||||||
|
{
|
||||||
|
sha256_16way_context ictx;
|
||||||
|
sha256_16way_context octx;
|
||||||
|
} hmac_sha256_16way_context;
|
||||||
|
|
||||||
|
//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] );
|
||||||
|
void hmac_sha256_16way_init( hmac_sha256_16way_context *,
|
||||||
|
const void *, size_t );
|
||||||
|
void hmac_sha256_16way_update( hmac_sha256_16way_context *, const void *,
|
||||||
|
size_t );
|
||||||
|
void hmac_sha256_16way_close( hmac_sha256_16way_context *, void* );
|
||||||
|
void hmac_sha256_16way_full( void*, const void *, size_t Klen, const void *,
|
||||||
|
size_t len );
|
||||||
|
|
||||||
|
void pbkdf2_sha256_16way( uint8_t *, size_t, const uint8_t *, size_t,
|
||||||
|
const uint8_t *, size_t, uint64_t );
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif // AVX512
|
||||||
|
#endif // AVX2
|
||||||
|
|
||||||
|
#endif // HMAC_SHA256_4WAY_H__
|
@@ -81,16 +81,17 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen )
|
|||||||
|
|
||||||
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
/* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
|
||||||
SHA256_Init( &ctx->ictx );
|
SHA256_Init( &ctx->ictx );
|
||||||
memset( pad, 0x36, 64 );
|
|
||||||
for ( i = 0; i < Klen; i++ )
|
|
||||||
pad[i] ^= K[i];
|
for ( i = 0; i < Klen; i++ ) pad[i] = K[i] ^ 0x36;
|
||||||
|
memset( pad + Klen, 0x36, 64 - Klen );
|
||||||
SHA256_Update( &ctx->ictx, pad, 64 );
|
SHA256_Update( &ctx->ictx, pad, 64 );
|
||||||
|
|
||||||
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
/* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
|
||||||
SHA256_Init( &ctx->octx );
|
SHA256_Init( &ctx->octx );
|
||||||
memset(pad, 0x5c, 64);
|
|
||||||
for ( i = 0; i < Klen; i++ )
|
for ( i = 0; i < Klen; i++ ) pad[i] = K[i] ^ 0x5c;
|
||||||
pad[i] ^= K[i];
|
memset( pad + Klen, 0x5c, 64 - Klen );
|
||||||
SHA256_Update( &ctx->octx, pad, 64 );
|
SHA256_Update( &ctx->octx, pad, 64 );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,6 +162,12 @@ PBKDF2_SHA256( const uint8_t *passwd, size_t passwdlen, const uint8_t *salt,
|
|||||||
HMAC_SHA256_Final( U, &hctx );
|
HMAC_SHA256_Final( U, &hctx );
|
||||||
|
|
||||||
/* ... xor U_j ... */
|
/* ... xor U_j ... */
|
||||||
|
// _mm256_xor_si256( *(__m256i*)T, *(__m256i*)U );
|
||||||
|
// _mm_xor_si128( ((__m128i*)T)[0], ((__m128i*)U)[0] );
|
||||||
|
// _mm_xor_si128( ((__m128i*)T)[1], ((__m128i*)U)[1] );
|
||||||
|
|
||||||
|
// for ( k = 0; k < 4; k++ ) T[k] ^= U[k];
|
||||||
|
|
||||||
for ( k = 0; k < 32; k++ )
|
for ( k = 0; k < 32; k++ )
|
||||||
T[k] ^= U[k];
|
T[k] ^= U[k];
|
||||||
}
|
}
|
||||||
|
@@ -58,6 +58,7 @@ void sha256_4way_init( sha256_4way_context *sc );
|
|||||||
void sha256_4way_update( sha256_4way_context *sc, const void *data,
|
void sha256_4way_update( sha256_4way_context *sc, const void *data,
|
||||||
size_t len );
|
size_t len );
|
||||||
void sha256_4way_close( sha256_4way_context *sc, void *dst );
|
void sha256_4way_close( sha256_4way_context *sc, void *dst );
|
||||||
|
void sha256_4way_full( void *dst, const void *data, size_t len );
|
||||||
|
|
||||||
#endif // SSE2
|
#endif // SSE2
|
||||||
|
|
||||||
@@ -75,6 +76,7 @@ typedef struct {
|
|||||||
void sha256_8way_init( sha256_8way_context *sc );
|
void sha256_8way_init( sha256_8way_context *sc );
|
||||||
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len );
|
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len );
|
||||||
void sha256_8way_close( sha256_8way_context *sc, void *dst );
|
void sha256_8way_close( sha256_8way_context *sc, void *dst );
|
||||||
|
void sha256_8way_full( void *dst, const void *data, size_t len );
|
||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
@@ -92,6 +94,7 @@ typedef struct {
|
|||||||
void sha256_16way_init( sha256_16way_context *sc );
|
void sha256_16way_init( sha256_16way_context *sc );
|
||||||
void sha256_16way_update( sha256_16way_context *sc, const void *data, size_t len );
|
void sha256_16way_update( sha256_16way_context *sc, const void *data, size_t len );
|
||||||
void sha256_16way_close( sha256_16way_context *sc, void *dst );
|
void sha256_16way_close( sha256_16way_context *sc, void *dst );
|
||||||
|
void sha256_16way_full( void *dst, const void *data, size_t len );
|
||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
@@ -110,6 +113,7 @@ void sha512_4way_init( sha512_4way_context *sc);
|
|||||||
void sha512_4way_update( sha512_4way_context *sc, const void *data,
|
void sha512_4way_update( sha512_4way_context *sc, const void *data,
|
||||||
size_t len );
|
size_t len );
|
||||||
void sha512_4way_close( sha512_4way_context *sc, void *dst );
|
void sha512_4way_close( sha512_4way_context *sc, void *dst );
|
||||||
|
void sha512_4way_full( void *dst, const void *data, size_t len );
|
||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
@@ -128,6 +132,7 @@ void sha512_8way_init( sha512_8way_context *sc);
|
|||||||
void sha512_8way_update( sha512_8way_context *sc, const void *data,
|
void sha512_8way_update( sha512_8way_context *sc, const void *data,
|
||||||
size_t len );
|
size_t len );
|
||||||
void sha512_8way_close( sha512_8way_context *sc, void *dst );
|
void sha512_8way_close( sha512_8way_context *sc, void *dst );
|
||||||
|
void sha512_8way_full( void *dst, const void *data, size_t len );
|
||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
|
@@ -479,8 +479,8 @@ static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
|
|||||||
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
const uint32_t *midstate, const uint32_t *prehash);
|
||||||
|
|
||||||
static inline int scanhash_sha256d_4way(int thr_id, struct work *work,
|
static inline int scanhash_sha256d_4way( struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -492,6 +492,7 @@ static inline int scanhash_sha256d_4way(int thr_id, struct work *work,
|
|||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
int thr_id = mythr->id;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
memcpy(data, pdata + 16, 64);
|
||||||
@@ -521,10 +522,8 @@ static inline int scanhash_sha256d_4way(int thr_id, struct work *work,
|
|||||||
if (swab32(hash[4 * 7 + i]) <= Htarg) {
|
if (swab32(hash[4 * 7 + i]) <= Htarg) {
|
||||||
pdata[19] = data[4 * 3 + i];
|
pdata[19] = data[4 * 3 + i];
|
||||||
sha256d_80_swap(hash, pdata);
|
sha256d_80_swap(hash, pdata);
|
||||||
if (fulltest(hash, ptarget)) {
|
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||||
*hashes_done = n - first_nonce + 1;
|
submit_solution( work, hash, mythr );
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||||
@@ -541,8 +540,8 @@ static inline int scanhash_sha256d_4way(int thr_id, struct work *work,
|
|||||||
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
const uint32_t *midstate, const uint32_t *prehash);
|
||||||
|
|
||||||
static inline int scanhash_sha256d_8way(int thr_id, struct work *work,
|
static inline int scanhash_sha256d_8way( struct work *work,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done)
|
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -554,6 +553,7 @@ static inline int scanhash_sha256d_8way(int thr_id, struct work *work,
|
|||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
int thr_id = mythr->id;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
memcpy(data, pdata + 16, 64);
|
||||||
@@ -583,10 +583,8 @@ static inline int scanhash_sha256d_8way(int thr_id, struct work *work,
|
|||||||
if (swab32(hash[8 * 7 + i]) <= Htarg) {
|
if (swab32(hash[8 * 7 + i]) <= Htarg) {
|
||||||
pdata[19] = data[8 * 3 + i];
|
pdata[19] = data[8 * 3 + i];
|
||||||
sha256d_80_swap(hash, pdata);
|
sha256d_80_swap(hash, pdata);
|
||||||
if (fulltest(hash, ptarget)) {
|
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||||
*hashes_done = n - first_nonce + 1;
|
submit_solution( work, hash, mythr );
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||||
@@ -614,13 +612,11 @@ int scanhash_sha256d( struct work *work,
|
|||||||
|
|
||||||
#ifdef HAVE_SHA256_8WAY
|
#ifdef HAVE_SHA256_8WAY
|
||||||
if (sha256_use_8way())
|
if (sha256_use_8way())
|
||||||
return scanhash_sha256d_8way(thr_id, work,
|
return scanhash_sha256d_8way( work, max_nonce, hashes_done, mythr );
|
||||||
max_nonce, hashes_done);
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
if (sha256_use_4way())
|
if (sha256_use_4way())
|
||||||
return scanhash_sha256d_4way(thr_id, work,
|
return scanhash_sha256d_4way( work, max_nonce, hashes_done, mythr );
|
||||||
max_nonce, hashes_done);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
memcpy(data, pdata + 16, 64);
|
||||||
@@ -657,7 +653,7 @@ int scanhash_SHA256d( struct work *work, const uint32_t max_nonce,
|
|||||||
uint32_t n = pdata[19] - 1;
|
uint32_t n = pdata[19] - 1;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
int thr_id = mythr->id;
|
||||||
|
|
||||||
memcpy( data, pdata, 80 );
|
memcpy( data, pdata, 80 );
|
||||||
|
|
||||||
|
@@ -330,6 +330,14 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
|||||||
mm128_block_bswap_32( dst, sc->val );
|
mm128_block_bswap_32( dst, sc->val );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sha256_4way_full( void *dst, const void *data, size_t len )
|
||||||
|
{
|
||||||
|
sha256_4way_context ctx;
|
||||||
|
sha256_4way_init( &ctx );
|
||||||
|
sha256_4way_update( &ctx, data, len );
|
||||||
|
sha256_4way_close( &ctx, dst );
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
// SHA-256 8 way
|
// SHA-256 8 way
|
||||||
@@ -498,6 +506,10 @@ void sha256_8way_init( sha256_8way_context *sc )
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// need to handle odd byte length for yespower.
|
||||||
|
// Assume only last update is odd.
|
||||||
|
|
||||||
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
|
void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len )
|
||||||
{
|
{
|
||||||
__m256i *vdata = (__m256i*)data;
|
__m256i *vdata = (__m256i*)data;
|
||||||
@@ -564,6 +576,13 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
|||||||
mm256_block_bswap_32( dst, sc->val );
|
mm256_block_bswap_32( dst, sc->val );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sha256_8way_full( void *dst, const void *data, size_t len )
|
||||||
|
{
|
||||||
|
sha256_8way_context ctx;
|
||||||
|
sha256_8way_init( &ctx );
|
||||||
|
sha256_8way_update( &ctx, data, len );
|
||||||
|
sha256_8way_close( &ctx, dst );
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
|
|
||||||
@@ -791,6 +810,14 @@ void sha256_16way_close( sha256_16way_context *sc, void *dst )
|
|||||||
mm512_block_bswap_32( dst, sc->val );
|
mm512_block_bswap_32( dst, sc->val );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sha256_16way_full( void *dst, const void *data, size_t len )
|
||||||
|
{
|
||||||
|
sha256_16way_context ctx;
|
||||||
|
sha256_16way_init( &ctx );
|
||||||
|
sha256_16way_update( &ctx, data, len );
|
||||||
|
sha256_16way_close( &ctx, dst );
|
||||||
|
}
|
||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
#endif // __AVX2__
|
#endif // __AVX2__
|
||||||
#endif // __SSE2__
|
#endif // __SSE2__
|
||||||
|
@@ -85,7 +85,7 @@ int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -173,7 +173,7 @@ int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -78,7 +78,7 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -161,7 +161,7 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -33,7 +33,7 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#ifdef __AES__
|
#if defined(__AES__)
|
||||||
|
|
||||||
#include "sph_shavite.h"
|
#include "sph_shavite.h"
|
||||||
#include "simd-utils.h"
|
#include "simd-utils.h"
|
||||||
|
@@ -35,6 +35,8 @@
|
|||||||
|
|
||||||
#include "sph_shavite.h"
|
#include "sph_shavite.h"
|
||||||
|
|
||||||
|
#if !defined(__AES__)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"{
|
extern "C"{
|
||||||
#endif
|
#endif
|
||||||
@@ -1762,3 +1764,6 @@ sph_shavite512_sw_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst
|
|||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif // !AES
|
||||||
|
|
||||||
|
@@ -262,15 +262,9 @@ void sph_shavite384_close(void *cc, void *dst);
|
|||||||
void sph_shavite384_addbits_and_close(
|
void sph_shavite384_addbits_and_close(
|
||||||
void *cc, unsigned ub, unsigned n, void *dst);
|
void *cc, unsigned ub, unsigned n, void *dst);
|
||||||
|
|
||||||
// Always define sw but only define aesni when available
|
//Don't call these directly from application code, use the macros below.
|
||||||
// Define fptrs for aesni or sw, not both.
|
|
||||||
void sph_shavite512_sw_init(void *cc);
|
|
||||||
void sph_shavite512_sw(void *cc, const void *data, size_t len);
|
|
||||||
void sph_shavite512_sw_close(void *cc, void *dst);
|
|
||||||
void sph_shavite512_sw_addbits_and_close(
|
|
||||||
void *cc, unsigned ub, unsigned n, void *dst);
|
|
||||||
|
|
||||||
#ifdef __AES__
|
#ifdef __AES__
|
||||||
|
|
||||||
void sph_shavite512_aesni_init(void *cc);
|
void sph_shavite512_aesni_init(void *cc);
|
||||||
void sph_shavite512_aesni(void *cc, const void *data, size_t len);
|
void sph_shavite512_aesni(void *cc, const void *data, size_t len);
|
||||||
void sph_shavite512_aesni_close(void *cc, void *dst);
|
void sph_shavite512_aesni_close(void *cc, void *dst);
|
||||||
@@ -285,6 +279,13 @@ void sph_shavite512_aesni_addbits_and_close(
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
void sph_shavite512_sw_init(void *cc);
|
||||||
|
void sph_shavite512_sw(void *cc, const void *data, size_t len);
|
||||||
|
void sph_shavite512_sw_close(void *cc, void *dst);
|
||||||
|
void sph_shavite512_sw_addbits_and_close(
|
||||||
|
void *cc, unsigned ub, unsigned n, void *dst);
|
||||||
|
|
||||||
|
|
||||||
#define sph_shavite512_init sph_shavite512_sw_init
|
#define sph_shavite512_init sph_shavite512_sw_init
|
||||||
#define sph_shavite512 sph_shavite512_sw
|
#define sph_shavite512 sph_shavite512_sw
|
||||||
#define sph_shavite512_close sph_shavite512_sw_close
|
#define sph_shavite512_close sph_shavite512_sw_close
|
||||||
@@ -293,6 +294,20 @@ void sph_shavite512_aesni_addbits_and_close(
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Use these macros from application code.
|
||||||
|
#define shavite512_context sph_shavite512_context
|
||||||
|
|
||||||
|
#define shavite512_init sph_shavite512_init
|
||||||
|
#define shavite512_update sph_shavite512
|
||||||
|
#define shavite512_close sph_shavite512_close
|
||||||
|
|
||||||
|
#define shavite512_full( cc, dst, data, len ) \
|
||||||
|
do{ \
|
||||||
|
shavite512_init( cc ); \
|
||||||
|
shavite512_update( cc, data, len ); \
|
||||||
|
shavite512_close( cc, dst ); \
|
||||||
|
}while(0)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -13,18 +13,18 @@
|
|||||||
|
|
||||||
#if defined (SKEIN_8WAY)
|
#if defined (SKEIN_8WAY)
|
||||||
|
|
||||||
|
static __thread skein512_8way_context skein512_8way_ctx
|
||||||
|
__attribute__ ((aligned (64)));
|
||||||
|
|
||||||
void skeinhash_8way( void *state, const void *input )
|
void skeinhash_8way( void *state, const void *input )
|
||||||
{
|
{
|
||||||
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
|
uint64_t vhash64[8*8] __attribute__ ((aligned (128)));
|
||||||
skein512_8way_context ctx_skein;
|
skein512_8way_context ctx_skein;
|
||||||
|
memcpy( &ctx_skein, &skein512_8way_ctx, sizeof( ctx_skein ) );
|
||||||
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
|
uint32_t vhash32[16*8] __attribute__ ((aligned (128)));
|
||||||
sha256_8way_context ctx_sha256;
|
sha256_8way_context ctx_sha256;
|
||||||
|
|
||||||
skein512_8way_init( &ctx_skein );
|
skein512_8way_final16( &ctx_skein, vhash64, input + (64*8) );
|
||||||
skein512_8way_update( &ctx_skein, input, 80 );
|
|
||||||
skein512_8way_close( &ctx_skein, vhash64 );
|
|
||||||
|
|
||||||
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
|
rintrlv_8x64_8x32( vhash32, vhash64, 512 );
|
||||||
|
|
||||||
sha256_8way_init( &ctx_sha256 );
|
sha256_8way_init( &ctx_sha256 );
|
||||||
@@ -36,49 +36,58 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
|||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (128)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t hash[16*8] __attribute__ ((aligned (64)));
|
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[7<<3]);
|
uint32_t *hash_d7 = &(hash[7*8]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t targ_d7 = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
__m512i *noncev = (__m512i*)vdata + 9;
|
||||||
int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||||
|
*noncev = mm512_intrlv_blend_32(
|
||||||
|
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
|
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||||
|
skein512_8way_prehash64( &skein512_8way_ctx, vdata );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
|
||||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
|
||||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
|
||||||
|
|
||||||
skeinhash_8way( hash, vdata );
|
skeinhash_8way( hash, vdata );
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
if ( hash7[ lane ] <= Htarg )
|
if ( unlikely( hash_d7[ lane ] <= targ_d7 ) && !bench )
|
||||||
{
|
{
|
||||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
|
m512_const1_64( 0x0000000800000000 ) );
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
|
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||||
|
|
||||||
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined (SKEIN_4WAY)
|
#elif defined (SKEIN_4WAY)
|
||||||
|
|
||||||
|
static __thread skein512_4way_context skein512_4way_ctx
|
||||||
|
__attribute__ ((aligned (64)));
|
||||||
|
|
||||||
void skeinhash_4way( void *state, const void *input )
|
void skeinhash_4way( void *state, const void *input )
|
||||||
{
|
{
|
||||||
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
|
uint64_t vhash64[8*4] __attribute__ ((aligned (128)));
|
||||||
skein512_4way_context ctx_skein;
|
skein512_4way_context ctx_skein;
|
||||||
|
memcpy( &ctx_skein, &skein512_4way_ctx, sizeof( ctx_skein ) );
|
||||||
#if defined(__SHA__)
|
#if defined(__SHA__)
|
||||||
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
uint32_t hash0[16] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
uint32_t hash1[16] __attribute__ ((aligned (64)));
|
||||||
@@ -90,9 +99,7 @@ void skeinhash_4way( void *state, const void *input )
|
|||||||
sha256_4way_context ctx_sha256;
|
sha256_4way_context ctx_sha256;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
skein512_4way_init( &ctx_skein );
|
skein512_4way_final16( &ctx_skein, vhash64, input + (64*4) );
|
||||||
skein512_4way_update( &ctx_skein, input, 80 );
|
|
||||||
skein512_4way_close( &ctx_skein, vhash64 );
|
|
||||||
|
|
||||||
#if defined(__SHA__)
|
#if defined(__SHA__)
|
||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 512 );
|
||||||
@@ -127,38 +134,43 @@ int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
|
|||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t *hash7 = &(hash[7<<2]);
|
uint32_t *hash_d7 = &(hash[7<<2]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t targ_d7 = ptarget[7];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9;
|
||||||
int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
|
skein512_4way_prehash64( &skein512_4way_ctx, vdata );
|
||||||
|
|
||||||
|
*noncev = mm256_intrlv_blend_32(
|
||||||
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
|
||||||
|
|
||||||
skeinhash_4way( hash, vdata );
|
skeinhash_4way( hash, vdata );
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
if ( hash7[ lane ] <= Htarg )
|
if ( unlikely( ( hash_d7[ lane ] <= targ_d7 ) && !bench ) )
|
||||||
{
|
{
|
||||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( valid_hash( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart );
|
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||||
|
|
||||||
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -4,14 +4,16 @@
|
|||||||
|
|
||||||
bool register_skein_algo( algo_gate_t* gate )
|
bool register_skein_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
|
||||||
#if defined (SKEIN_8WAY)
|
#if defined (SKEIN_8WAY)
|
||||||
|
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_skein_8way;
|
gate->scanhash = (void*)&scanhash_skein_8way;
|
||||||
gate->hash = (void*)&skeinhash_8way;
|
gate->hash = (void*)&skeinhash_8way;
|
||||||
#elif defined (SKEIN_4WAY)
|
#elif defined (SKEIN_4WAY)
|
||||||
|
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_skein_4way;
|
gate->scanhash = (void*)&scanhash_skein_4way;
|
||||||
gate->hash = (void*)&skeinhash_4way;
|
gate->hash = (void*)&skeinhash_4way;
|
||||||
#else
|
#else
|
||||||
|
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_skein;
|
gate->scanhash = (void*)&scanhash_skein;
|
||||||
gate->hash = (void*)&skeinhash;
|
gate->hash = (void*)&skeinhash;
|
||||||
#endif
|
#endif
|
||||||
|
@@ -654,6 +654,160 @@ skein_big_close_8way( skein512_8way_context *sc, unsigned ub, unsigned n,
|
|||||||
memcpy_512( dst, buf, out_len >> 3 );
|
memcpy_512( dst, buf, out_len >> 3 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void skein512_8way_full( skein512_8way_context *sc, void *out, const void *data,
|
||||||
|
size_t len )
|
||||||
|
{
|
||||||
|
__m512i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||||
|
__m512i *vdata = (__m512i*)data;
|
||||||
|
__m512i *buf = sc->buf;
|
||||||
|
size_t ptr = 0;
|
||||||
|
unsigned first;
|
||||||
|
uint64_t bcount = 0;
|
||||||
|
const int buf_size = 64; // 64 * _m256i
|
||||||
|
|
||||||
|
// Init
|
||||||
|
|
||||||
|
h0 = m512_const1_64( 0x4903ADFF749C51CE );
|
||||||
|
h1 = m512_const1_64( 0x0D95DE399746DF03 );
|
||||||
|
h2 = m512_const1_64( 0x8FD1934127C79BCE );
|
||||||
|
h3 = m512_const1_64( 0x9A255629FF352CB1 );
|
||||||
|
h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
|
||||||
|
h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
|
||||||
|
h6 = m512_const1_64( 0x991112C71A75B523 );
|
||||||
|
h7 = m512_const1_64( 0xAE18A40B660FCC33 );
|
||||||
|
|
||||||
|
// Update
|
||||||
|
|
||||||
|
if ( len <= buf_size - ptr )
|
||||||
|
{
|
||||||
|
memcpy_512( buf + (ptr>>3), vdata, len>>3 );
|
||||||
|
ptr += len;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
first = ( bcount == 0 ) << 7;
|
||||||
|
do {
|
||||||
|
size_t clen;
|
||||||
|
|
||||||
|
if ( ptr == buf_size )
|
||||||
|
{
|
||||||
|
bcount ++;
|
||||||
|
UBI_BIG_8WAY( 96 + first, 0 );
|
||||||
|
first = 0;
|
||||||
|
ptr = 0;
|
||||||
|
}
|
||||||
|
clen = buf_size - ptr;
|
||||||
|
if ( clen > len )
|
||||||
|
clen = len;
|
||||||
|
memcpy_512( buf + (ptr>>3), vdata, clen>>3 );
|
||||||
|
ptr += clen;
|
||||||
|
vdata += (clen>>3);
|
||||||
|
len -= clen;
|
||||||
|
} while ( len > 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close
|
||||||
|
|
||||||
|
unsigned et;
|
||||||
|
|
||||||
|
memset_zero_512( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||||
|
et = 352 + ((bcount == 0) << 7);
|
||||||
|
UBI_BIG_8WAY( et, ptr );
|
||||||
|
|
||||||
|
memset_zero_512( buf, buf_size >> 3 );
|
||||||
|
bcount = 0;
|
||||||
|
UBI_BIG_8WAY( 510, 8 );
|
||||||
|
|
||||||
|
casti_m512i( out, 0 ) = h0;
|
||||||
|
casti_m512i( out, 1 ) = h1;
|
||||||
|
casti_m512i( out, 2 ) = h2;
|
||||||
|
casti_m512i( out, 3 ) = h3;
|
||||||
|
casti_m512i( out, 4 ) = h4;
|
||||||
|
casti_m512i( out, 5 ) = h5;
|
||||||
|
casti_m512i( out, 6 ) = h6;
|
||||||
|
casti_m512i( out, 7 ) = h7;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
skein512_8way_prehash64( skein512_8way_context *sc, const void *data )
|
||||||
|
{
|
||||||
|
__m512i *vdata = (__m512i*)data;
|
||||||
|
__m512i *buf = sc->buf;
|
||||||
|
buf[0] = vdata[0];
|
||||||
|
buf[1] = vdata[1];
|
||||||
|
buf[2] = vdata[2];
|
||||||
|
buf[3] = vdata[3];
|
||||||
|
buf[4] = vdata[4];
|
||||||
|
buf[5] = vdata[5];
|
||||||
|
buf[6] = vdata[6];
|
||||||
|
buf[7] = vdata[7];
|
||||||
|
register __m512i h0 = m512_const1_64( 0x4903ADFF749C51CE );
|
||||||
|
register __m512i h1 = m512_const1_64( 0x0D95DE399746DF03 );
|
||||||
|
register __m512i h2 = m512_const1_64( 0x8FD1934127C79BCE );
|
||||||
|
register __m512i h3 = m512_const1_64( 0x9A255629FF352CB1 );
|
||||||
|
register __m512i h4 = m512_const1_64( 0x5DB62599DF6CA7B0 );
|
||||||
|
register __m512i h5 = m512_const1_64( 0xEABE394CA9D5C3F4 );
|
||||||
|
register __m512i h6 = m512_const1_64( 0x991112C71A75B523 );
|
||||||
|
register __m512i h7 = m512_const1_64( 0xAE18A40B660FCC33 );
|
||||||
|
uint64_t bcount = 1;
|
||||||
|
|
||||||
|
UBI_BIG_8WAY( 224, 0 );
|
||||||
|
sc->h0 = h0;
|
||||||
|
sc->h1 = h1;
|
||||||
|
sc->h2 = h2;
|
||||||
|
sc->h3 = h3;
|
||||||
|
sc->h4 = h4;
|
||||||
|
sc->h5 = h5;
|
||||||
|
sc->h6 = h6;
|
||||||
|
sc->h7 = h7;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
skein512_8way_final16( skein512_8way_context *sc, void *output,
|
||||||
|
const void *data )
|
||||||
|
{
|
||||||
|
__m512i *in = (__m512i*)data;
|
||||||
|
__m512i *buf = sc->buf;
|
||||||
|
__m512i *out = (__m512i*)output;
|
||||||
|
register __m512i h0 = sc->h0;
|
||||||
|
register __m512i h1 = sc->h1;
|
||||||
|
register __m512i h2 = sc->h2;
|
||||||
|
register __m512i h3 = sc->h3;
|
||||||
|
register __m512i h4 = sc->h4;
|
||||||
|
register __m512i h5 = sc->h5;
|
||||||
|
register __m512i h6 = sc->h6;
|
||||||
|
register __m512i h7 = sc->h7;
|
||||||
|
|
||||||
|
const __m512i zero = m512_zero;
|
||||||
|
buf[0] = in[0];
|
||||||
|
buf[1] = in[1];
|
||||||
|
buf[2] = zero;
|
||||||
|
buf[3] = zero;
|
||||||
|
buf[4] = zero;
|
||||||
|
buf[5] = zero;
|
||||||
|
buf[6] = zero;
|
||||||
|
buf[7] = zero;
|
||||||
|
|
||||||
|
uint64_t bcount = 1;
|
||||||
|
UBI_BIG_8WAY( 352, 16 );
|
||||||
|
|
||||||
|
buf[0] = zero;
|
||||||
|
buf[1] = zero;
|
||||||
|
|
||||||
|
bcount = 0;
|
||||||
|
UBI_BIG_8WAY( 510, 8 );
|
||||||
|
|
||||||
|
out[0] = h0;
|
||||||
|
out[1] = h1;
|
||||||
|
out[2] = h2;
|
||||||
|
out[3] = h3;
|
||||||
|
out[4] = h4;
|
||||||
|
out[5] = h5;
|
||||||
|
out[6] = h6;
|
||||||
|
out[7] = h7;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
skein256_8way_update(void *cc, const void *data, size_t len)
|
skein256_8way_update(void *cc, const void *data, size_t len)
|
||||||
{
|
{
|
||||||
@@ -709,6 +863,7 @@ void skein512_4way_init( skein512_4way_context *sc )
|
|||||||
sc->ptr = 0;
|
sc->ptr = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Do not use for 128 bt data length
|
||||||
static void
|
static void
|
||||||
skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
skein_big_core_4way( skein512_4way_context *sc, const void *data,
|
||||||
size_t len )
|
size_t len )
|
||||||
@@ -794,6 +949,157 @@ skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
|
|||||||
memcpy_256( dst, buf, out_len >> 3 );
|
memcpy_256( dst, buf, out_len >> 3 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
skein512_4way_full( skein512_4way_context *sc, void *out, const void *data,
|
||||||
|
size_t len )
|
||||||
|
{
|
||||||
|
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
|
||||||
|
__m256i *vdata = (__m256i*)data;
|
||||||
|
__m256i *buf = sc->buf;
|
||||||
|
size_t ptr = 0;
|
||||||
|
unsigned first;
|
||||||
|
const int buf_size = 64; // 64 * __m256i
|
||||||
|
uint64_t bcount = 0;
|
||||||
|
|
||||||
|
h0 = m256_const1_64( 0x4903ADFF749C51CE );
|
||||||
|
h1 = m256_const1_64( 0x0D95DE399746DF03 );
|
||||||
|
h2 = m256_const1_64( 0x8FD1934127C79BCE );
|
||||||
|
h3 = m256_const1_64( 0x9A255629FF352CB1 );
|
||||||
|
h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
|
||||||
|
h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
|
||||||
|
h6 = m256_const1_64( 0x991112C71A75B523 );
|
||||||
|
h7 = m256_const1_64( 0xAE18A40B660FCC33 );
|
||||||
|
|
||||||
|
// Update
|
||||||
|
|
||||||
|
if ( len <= buf_size - ptr )
|
||||||
|
{
|
||||||
|
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||||
|
ptr += len;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
first = ( bcount == 0 ) << 7;
|
||||||
|
do {
|
||||||
|
size_t clen;
|
||||||
|
|
||||||
|
if ( ptr == buf_size )
|
||||||
|
{
|
||||||
|
bcount ++;
|
||||||
|
UBI_BIG_4WAY( 96 + first, 0 );
|
||||||
|
first = 0;
|
||||||
|
ptr = 0;
|
||||||
|
}
|
||||||
|
clen = buf_size - ptr;
|
||||||
|
if ( clen > len )
|
||||||
|
clen = len;
|
||||||
|
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||||
|
ptr += clen;
|
||||||
|
vdata += (clen>>3);
|
||||||
|
len -= clen;
|
||||||
|
} while ( len > 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close
|
||||||
|
|
||||||
|
unsigned et;
|
||||||
|
|
||||||
|
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
|
||||||
|
et = 352 + ((bcount == 0) << 7);
|
||||||
|
UBI_BIG_4WAY( et, ptr );
|
||||||
|
|
||||||
|
memset_zero_256( buf, buf_size >> 3 );
|
||||||
|
bcount = 0;
|
||||||
|
UBI_BIG_4WAY( 510, 8 );
|
||||||
|
|
||||||
|
casti_m256i( out, 0 ) = h0;
|
||||||
|
casti_m256i( out, 1 ) = h1;
|
||||||
|
casti_m256i( out, 2 ) = h2;
|
||||||
|
casti_m256i( out, 3 ) = h3;
|
||||||
|
casti_m256i( out, 4 ) = h4;
|
||||||
|
casti_m256i( out, 5 ) = h5;
|
||||||
|
casti_m256i( out, 6 ) = h6;
|
||||||
|
casti_m256i( out, 7 ) = h7;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
skein512_4way_prehash64( skein512_4way_context *sc, const void *data )
|
||||||
|
{
|
||||||
|
__m256i *vdata = (__m256i*)data;
|
||||||
|
__m256i *buf = sc->buf;
|
||||||
|
buf[0] = vdata[0];
|
||||||
|
buf[1] = vdata[1];
|
||||||
|
buf[2] = vdata[2];
|
||||||
|
buf[3] = vdata[3];
|
||||||
|
buf[4] = vdata[4];
|
||||||
|
buf[5] = vdata[5];
|
||||||
|
buf[6] = vdata[6];
|
||||||
|
buf[7] = vdata[7];
|
||||||
|
register __m256i h0 = m256_const1_64( 0x4903ADFF749C51CE );
|
||||||
|
register __m256i h1 = m256_const1_64( 0x0D95DE399746DF03 );
|
||||||
|
register __m256i h2 = m256_const1_64( 0x8FD1934127C79BCE );
|
||||||
|
register __m256i h3 = m256_const1_64( 0x9A255629FF352CB1 );
|
||||||
|
register __m256i h4 = m256_const1_64( 0x5DB62599DF6CA7B0 );
|
||||||
|
register __m256i h5 = m256_const1_64( 0xEABE394CA9D5C3F4 );
|
||||||
|
register __m256i h6 = m256_const1_64( 0x991112C71A75B523 );
|
||||||
|
register __m256i h7 = m256_const1_64( 0xAE18A40B660FCC33 );
|
||||||
|
uint64_t bcount = 1;
|
||||||
|
|
||||||
|
UBI_BIG_4WAY( 224, 0 );
|
||||||
|
sc->h0 = h0;
|
||||||
|
sc->h1 = h1;
|
||||||
|
sc->h2 = h2;
|
||||||
|
sc->h3 = h3;
|
||||||
|
sc->h4 = h4;
|
||||||
|
sc->h5 = h5;
|
||||||
|
sc->h6 = h6;
|
||||||
|
sc->h7 = h7;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
skein512_4way_final16( skein512_4way_context *sc, void *out, const void *data )
|
||||||
|
{
|
||||||
|
__m256i *vdata = (__m256i*)data;
|
||||||
|
__m256i *buf = sc->buf;
|
||||||
|
register __m256i h0 = sc->h0;
|
||||||
|
register __m256i h1 = sc->h1;
|
||||||
|
register __m256i h2 = sc->h2;
|
||||||
|
register __m256i h3 = sc->h3;
|
||||||
|
register __m256i h4 = sc->h4;
|
||||||
|
register __m256i h5 = sc->h5;
|
||||||
|
register __m256i h6 = sc->h6;
|
||||||
|
register __m256i h7 = sc->h7;
|
||||||
|
|
||||||
|
const __m256i zero = m256_zero;
|
||||||
|
buf[0] = vdata[0];
|
||||||
|
buf[1] = vdata[1];
|
||||||
|
buf[2] = zero;
|
||||||
|
buf[3] = zero;
|
||||||
|
buf[4] = zero;
|
||||||
|
buf[5] = zero;
|
||||||
|
buf[6] = zero;
|
||||||
|
buf[7] = zero;
|
||||||
|
|
||||||
|
uint64_t bcount = 1;
|
||||||
|
UBI_BIG_4WAY( 352, 16 );
|
||||||
|
|
||||||
|
buf[0] = zero;
|
||||||
|
buf[1] = zero;
|
||||||
|
|
||||||
|
bcount = 0;
|
||||||
|
UBI_BIG_4WAY( 510, 8 );
|
||||||
|
|
||||||
|
casti_m256i( out, 0 ) = h0;
|
||||||
|
casti_m256i( out, 1 ) = h1;
|
||||||
|
casti_m256i( out, 2 ) = h2;
|
||||||
|
casti_m256i( out, 3 ) = h3;
|
||||||
|
casti_m256i( out, 4 ) = h4;
|
||||||
|
casti_m256i( out, 5 ) = h5;
|
||||||
|
casti_m256i( out, 6 ) = h6;
|
||||||
|
casti_m256i( out, 7 ) = h7;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Broken for 80 bytes, use prehash.
|
||||||
void
|
void
|
||||||
skein256_4way_update(void *cc, const void *data, size_t len)
|
skein256_4way_update(void *cc, const void *data, size_t len)
|
||||||
{
|
{
|
||||||
@@ -806,6 +1112,9 @@ skein256_4way_close(void *cc, void *dst)
|
|||||||
skein_big_close_4way(cc, 0, 0, dst, 32);
|
skein_big_close_4way(cc, 0, 0, dst, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Do not use with 128 bit data
|
||||||
void
|
void
|
||||||
skein512_4way_update(void *cc, const void *data, size_t len)
|
skein512_4way_update(void *cc, const void *data, size_t len)
|
||||||
{
|
{
|
||||||
|
@@ -63,10 +63,16 @@ typedef struct
|
|||||||
typedef skein_8way_big_context skein512_8way_context;
|
typedef skein_8way_big_context skein512_8way_context;
|
||||||
typedef skein_8way_big_context skein256_8way_context;
|
typedef skein_8way_big_context skein256_8way_context;
|
||||||
|
|
||||||
|
void skein512_8way_full( skein512_8way_context *sc, void *out,
|
||||||
|
const void *data, size_t len );
|
||||||
void skein512_8way_init( skein512_8way_context *sc );
|
void skein512_8way_init( skein512_8way_context *sc );
|
||||||
void skein512_8way_update( void *cc, const void *data, size_t len );
|
void skein512_8way_update( void *cc, const void *data, size_t len );
|
||||||
void skein512_8way_close( void *cc, void *dst );
|
void skein512_8way_close( void *cc, void *dst );
|
||||||
|
|
||||||
|
void skein512_8way_prehash64( skein512_8way_context *sc, const void *data );
|
||||||
|
void skein512_8way_final16( skein512_8way_context *sc, void *out,
|
||||||
|
const void *data );
|
||||||
|
|
||||||
void skein256_8way_init( skein256_8way_context *sc );
|
void skein256_8way_init( skein256_8way_context *sc );
|
||||||
void skein256_8way_update( void *cc, const void *data, size_t len );
|
void skein256_8way_update( void *cc, const void *data, size_t len );
|
||||||
void skein256_8way_close( void *cc, void *dst );
|
void skein256_8way_close( void *cc, void *dst );
|
||||||
@@ -85,6 +91,8 @@ typedef skein_4way_big_context skein512_4way_context;
|
|||||||
typedef skein_4way_big_context skein256_4way_context;
|
typedef skein_4way_big_context skein256_4way_context;
|
||||||
|
|
||||||
void skein512_4way_init( skein512_4way_context *sc );
|
void skein512_4way_init( skein512_4way_context *sc );
|
||||||
|
void skein512_4way_full( skein512_4way_context *sc, void *out,
|
||||||
|
const void *data, size_t len );
|
||||||
void skein512_4way_update( void *cc, const void *data, size_t len );
|
void skein512_4way_update( void *cc, const void *data, size_t len );
|
||||||
void skein512_4way_close( void *cc, void *dst );
|
void skein512_4way_close( void *cc, void *dst );
|
||||||
|
|
||||||
@@ -92,6 +100,10 @@ void skein256_4way_init( skein256_4way_context *sc );
|
|||||||
void skein256_4way_update( void *cc, const void *data, size_t len );
|
void skein256_4way_update( void *cc, const void *data, size_t len );
|
||||||
void skein256_4way_close( void *cc, void *dst );
|
void skein256_4way_close( void *cc, void *dst );
|
||||||
|
|
||||||
|
void skein512_4way_prehash64( skein512_4way_context *sc, const void *data );
|
||||||
|
void skein512_4way_final16( skein512_4way_context *sc, void *out,
|
||||||
|
const void *data );
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -5,114 +5,126 @@
|
|||||||
|
|
||||||
#if defined(SKEIN_8WAY)
|
#if defined(SKEIN_8WAY)
|
||||||
|
|
||||||
|
static __thread skein512_8way_context skein512_8way_ctx
|
||||||
|
__attribute__ ((aligned (64)));
|
||||||
|
|
||||||
void skein2hash_8way( void *output, const void *input )
|
void skein2hash_8way( void *output, const void *input )
|
||||||
{
|
{
|
||||||
skein512_8way_context ctx;
|
|
||||||
uint64_t hash[16*8] __attribute__ ((aligned (128)));
|
uint64_t hash[16*8] __attribute__ ((aligned (128)));
|
||||||
|
skein512_8way_context ctx;
|
||||||
|
memcpy( &ctx, &skein512_8way_ctx, sizeof( ctx ) );
|
||||||
|
|
||||||
skein512_8way_init( &ctx );
|
skein512_8way_final16( &ctx, hash, input + (64*8) );
|
||||||
skein512_8way_update( &ctx, input, 80 );
|
skein512_8way_full( &ctx, output, hash, 64 );
|
||||||
skein512_8way_close( &ctx, hash );
|
|
||||||
|
|
||||||
skein512_8way_init( &ctx );
|
|
||||||
skein512_8way_update( &ctx, hash, 64 );
|
|
||||||
skein512_8way_close( &ctx, output );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
uint64_t hash[8*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[49]);
|
uint64_t *hashq3 = &(hash[3*8]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint64_t targq3 = ((uint64_t*)ptarget)[3];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
__m512i *noncev = (__m512i*)vdata + 9;
|
||||||
int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
skein512_8way_context ctx;
|
||||||
|
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||||
|
*noncev = mm512_intrlv_blend_32(
|
||||||
|
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
|
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||||
|
skein512_8way_prehash64( &ctx, vdata );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
skein512_8way_final16( &ctx, hash, vdata + (16*8) );
|
||||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
skein512_8way_full( &ctx, hash, hash, 64 );
|
||||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
|
||||||
|
|
||||||
skein2hash_8way( hash, vdata );
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
if ( hash7[ lane<<1 ] <= Htarg )
|
if ( unlikely( hashq3[ lane ] <= targq3 && !bench ) )
|
||||||
{
|
{
|
||||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( valid_hash( lane_hash, ptarget ) && !bench )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
|
m512_const1_64( 0x0000000800000000 ) );
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart );
|
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(SKEIN_4WAY)
|
#elif defined(SKEIN_4WAY)
|
||||||
|
|
||||||
|
static __thread skein512_4way_context skein512_4way_ctx
|
||||||
|
__attribute__ ((aligned (64)));
|
||||||
|
|
||||||
void skein2hash_4way( void *output, const void *input )
|
void skein2hash_4way( void *output, const void *input )
|
||||||
{
|
{
|
||||||
skein512_4way_context ctx;
|
skein512_4way_context ctx;
|
||||||
|
memcpy( &ctx, &skein512_4way_ctx, sizeof( ctx ) );
|
||||||
uint64_t hash[16*4] __attribute__ ((aligned (64)));
|
uint64_t hash[16*4] __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
skein512_4way_init( &ctx );
|
skein512_4way_final16( &ctx, hash, input + (64*4) );
|
||||||
skein512_4way_update( &ctx, input, 80 );
|
skein512_4way_full( &ctx, output, hash, 64 );
|
||||||
skein512_4way_close( &ctx, hash );
|
|
||||||
|
|
||||||
skein512_4way_init( &ctx );
|
|
||||||
skein512_4way_update( &ctx, hash, 64 );
|
|
||||||
skein512_4way_close( &ctx, output );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
uint64_t hash[8*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash7 = &(hash[25]);
|
uint64_t *hash_q3 = &(hash[3*4]);
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint64_t targ_q3 = ((uint64_t*)ptarget)[3];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9;
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
skein512_4way_context ctx;
|
||||||
|
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
|
skein512_4way_prehash64( &ctx, vdata );
|
||||||
|
*noncev = mm256_intrlv_blend_32(
|
||||||
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
skein512_4way_final16( &ctx, hash, vdata + (16*4) );
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
skein512_4way_full( &ctx, hash, hash, 64 );
|
||||||
|
|
||||||
skein2hash_4way( hash, vdata );
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
if ( hash7[ lane<<1 ] <= Htarg )
|
if ( hash_q3[ lane ] <= targ_q3 )
|
||||||
{
|
{
|
||||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( valid_hash( lane_hash, ptarget ) && !bench )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -120,6 +120,13 @@ void sph_whirlpool(void *cc, const void *data, size_t len);
|
|||||||
*/
|
*/
|
||||||
void sph_whirlpool_close(void *cc, void *dst);
|
void sph_whirlpool_close(void *cc, void *dst);
|
||||||
|
|
||||||
|
#define sph_whirlpool512_full( cc, dst, data, len ) \
|
||||||
|
do{ \
|
||||||
|
sph_whirlpool_init( cc ); \
|
||||||
|
sph_whirlpool( cc, data, len ); \
|
||||||
|
sph_whirlpool_close( cc, dst ); \
|
||||||
|
}while(0)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
|
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
|
||||||
*/
|
*/
|
||||||
|
@@ -279,7 +279,7 @@ int scanhash_c11_8way( struct work *work, uint32_t max_nonce,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
@@ -459,7 +459,7 @@ int scanhash_c11_4way( struct work *work, uint32_t max_nonce,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -221,7 +221,7 @@ int scanhash_timetravel_4way( struct work *work, uint32_t max_nonce,
|
|||||||
&& !opt_benchmark )
|
&& !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !(*restart) );
|
} while ( ( n < max_nonce ) && !(*restart) );
|
||||||
|
@@ -256,7 +256,7 @@ int scanhash_timetravel10_4way( struct work *work,
|
|||||||
&& !opt_benchmark )
|
&& !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !(*restart) );
|
} while ( ( n < max_nonce ) && !(*restart) );
|
||||||
|
@@ -128,7 +128,7 @@ int scanhash_tribus_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart);
|
} while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart);
|
||||||
@@ -213,7 +213,7 @@ int scanhash_tribus_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart);
|
} while ( ( n < max_nonce-4 ) && !work_restart[thr_id].restart);
|
||||||
|
@@ -279,7 +279,7 @@ int scanhash_x11_8way( struct work *work, uint32_t max_nonce,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||||
@@ -469,7 +469,7 @@ int scanhash_x11_4way( struct work *work, uint32_t max_nonce,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -269,7 +269,7 @@ int scanhash_x11evo_4way( struct work* work, uint32_t max_nonce,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -312,7 +312,7 @@ int scanhash_x11gost_8way( struct work *work, uint32_t max_nonce,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
@@ -498,7 +498,7 @@ int scanhash_x11gost_4way( struct work *work, uint32_t max_nonce,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -170,6 +170,9 @@ void x12_8way_hash( void *state, const void *input )
|
|||||||
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
dintrlv_4x128_512( hash0, hash1, hash2, hash3, vhashA );
|
||||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhashB );
|
||||||
|
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
|
(const BitSequence *) hash0, 512 );
|
||||||
|
memcpy( &ctx.echo, &x12_8way_ctx.echo, sizeof(hashState_echo) );
|
||||||
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||||
(const BitSequence *) hash1, 512 );
|
(const BitSequence *) hash1, 512 );
|
||||||
memcpy( &ctx.echo, &x12_8way_ctx.echo, sizeof(hashState_echo) );
|
memcpy( &ctx.echo, &x12_8way_ctx.echo, sizeof(hashState_echo) );
|
||||||
@@ -263,7 +266,7 @@ int scanhash_x12_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -363,6 +366,18 @@ void x12_4way_hash( void *state, const void *input )
|
|||||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||||
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
dintrlv_2x128( hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||||
|
(const BitSequence *) hash0, 512 );
|
||||||
|
memcpy( &ctx.echo, &x12_4way_ctx.echo, sizeof(hashState_echo) );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash1,
|
||||||
|
(const BitSequence *) hash1, 512 );
|
||||||
|
memcpy( &ctx.echo, &x12_4way_ctx.echo, sizeof(hashState_echo) );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash2,
|
||||||
|
(const BitSequence *) hash2, 512 );
|
||||||
|
memcpy( &ctx.echo, &x12_4way_ctx.echo, sizeof(hashState_echo) );
|
||||||
|
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||||
|
(const BitSequence *) hash3, 512 );
|
||||||
|
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||||
memcpy( &ctx.groestl, &x12_4way_ctx.groestl, sizeof(hashState_groestl) );
|
memcpy( &ctx.groestl, &x12_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 512 );
|
||||||
@@ -431,7 +446,7 @@ int scanhash_x12_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -208,7 +208,7 @@ int scanhash_phi1612_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce-8 ) && !work_restart[thr_id].restart );
|
||||||
@@ -251,8 +251,12 @@ void phi1612_4way_hash( void *state, const void *input )
|
|||||||
memcpy( &ctx, &phi1612_4way_ctx, sizeof(phi1612_4way_ctx) );
|
memcpy( &ctx, &phi1612_4way_ctx, sizeof(phi1612_4way_ctx) );
|
||||||
|
|
||||||
// Skein parallel 4way
|
// Skein parallel 4way
|
||||||
skein512_4way_update( &ctx.skein, input, 80 );
|
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
// skein 4way is broken for 80 bytes
|
||||||
|
// skein512_4way_update( &ctx.skein, input, 80 );
|
||||||
|
// skein512_4way_close( &ctx.skein, vhash );
|
||||||
|
skein512_4way_prehash64( &ctx.skein, input );
|
||||||
|
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||||
|
|
||||||
// JH
|
// JH
|
||||||
jh512_4way_update( &ctx.jh, vhash, 64 );
|
jh512_4way_update( &ctx.jh, vhash, 64 );
|
||||||
@@ -344,7 +348,7 @@ int scanhash_phi1612_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -35,8 +35,7 @@ void skunk_8way_hash( void *output, const void *input )
|
|||||||
skunk_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
skunk_8way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
memcpy( &ctx, &skunk_8way_ctx, sizeof(skunk_8way_ctx) );
|
memcpy( &ctx, &skunk_8way_ctx, sizeof(skunk_8way_ctx) );
|
||||||
|
|
||||||
skein512_8way_update( &ctx.skein, input, 80 );
|
skein512_8way_final16( &ctx.skein, vhash, input );
|
||||||
skein512_8way_close( &ctx.skein, vhash );
|
|
||||||
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||||
hash7, vhash, 512 );
|
hash7, vhash, 512 );
|
||||||
|
|
||||||
@@ -104,35 +103,35 @@ int scanhash_skunk_8way( struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 8;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
__m512i *noncev = (__m512i*)vdata + 9;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const int thr_id = mythr->id;
|
||||||
int thr_id = mythr->id;
|
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( bench ) ptarget[7] = 0x0fff;
|
||||||
((uint32_t*)ptarget)[7] = 0x0cff;
|
|
||||||
|
|
||||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||||
|
skein512_8way_prehash64( &skunk_8way_ctx.skein, vdata );
|
||||||
|
*noncev = mm512_intrlv_blend_32(
|
||||||
|
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||||
|
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
|
||||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
|
||||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
|
||||||
|
|
||||||
skunk_8way_hash( hash, vdata );
|
skunk_8way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
|
||||||
|
|
||||||
for ( int i = 0; i < 8; i++ )
|
for ( int i = 0; i < 8; i++ )
|
||||||
if ( unlikely( (hash+(i<<3))[7] <= Htarg ) )
|
if ( unlikely( valid_hash( hash+(i<<3), ptarget ) && !bench ) )
|
||||||
if ( likely( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark ) )
|
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = bswap_32( n+i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
|
m512_const1_64( 0x0000000800000000 ) );
|
||||||
n +=8;
|
n +=8;
|
||||||
} while ( likely( ( n < max_nonce-8 ) && !(*restart) ) );
|
} while ( likely( ( n < last_nonce ) && !( *restart ) ) );
|
||||||
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -159,17 +158,16 @@ static __thread skunk_4way_ctx_holder skunk_4way_ctx;
|
|||||||
|
|
||||||
void skunk_4way_hash( void *output, const void *input )
|
void skunk_4way_hash( void *output, const void *input )
|
||||||
{
|
{
|
||||||
|
uint64_t vhash[8*4] __attribute__ ((aligned (128)));
|
||||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
|
||||||
|
|
||||||
skunk_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
skunk_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) );
|
memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) );
|
||||||
|
|
||||||
skein512_4way_update( &ctx.skein, input, 80 );
|
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
|
||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
|
|
||||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*)hash0, 64 );
|
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*)hash0, 64 );
|
||||||
@@ -213,40 +211,40 @@ void skunk_4way_hash( void *output, const void *input )
|
|||||||
int scanhash_skunk_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_skunk_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
uint32_t hash[4*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9;
|
||||||
const uint32_t Htarg = ptarget[7];
|
const int thr_id = mythr->id;
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
|
||||||
volatile uint8_t *restart = &( work_restart[ thr_id ].restart );
|
volatile uint8_t *restart = &( work_restart[ thr_id ].restart );
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( bench ) ptarget[7] = 0x0fff;
|
||||||
((uint32_t*)ptarget)[7] = 0x0cff;
|
|
||||||
|
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
|
skein512_4way_prehash64( &skunk_4way_ctx.skein, vdata );
|
||||||
|
*noncev = mm256_intrlv_blend_32(
|
||||||
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
|
||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
|
||||||
|
|
||||||
skunk_4way_hash( hash, vdata );
|
skunk_4way_hash( hash, vdata );
|
||||||
pdata[19] = n;
|
|
||||||
|
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( (hash+(i<<3))[7] <= Htarg )
|
if ( unlikely( valid_hash( hash+(i<<3), ptarget ) && !bench ) )
|
||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = bswap_32( n + i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
n +=4;
|
n +=4;
|
||||||
} while ( ( n < max_nonce ) && !(*restart) );
|
} while ( likely( ( n < last_nonce ) && !( *restart ) ) );
|
||||||
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -319,7 +319,7 @@ int scanhash_x13_8way( struct work *work, uint32_t max_nonce,
|
|||||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||||
@@ -531,7 +531,7 @@ int scanhash_x13_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -321,7 +321,7 @@ int scanhash_x13bcd_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||||
@@ -541,7 +541,7 @@ int scanhash_x13bcd_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -246,7 +246,7 @@ int scanhash_x13sm3_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -129,7 +129,7 @@ int scanhash_polytimos_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
|
||||||
|
@@ -108,7 +108,7 @@ int scanhash_veltor_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < max_nonce ) && !(*restart) );
|
} while ( ( n < max_nonce ) && !(*restart) );
|
||||||
|
@@ -324,7 +324,7 @@ int scanhash_x14_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
@@ -534,7 +534,7 @@ int scanhash_x14_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
|
@@ -364,7 +364,7 @@ int scanhash_x15_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash, mythr, i );
|
submit_solution( work, hash, mythr );
|
||||||
}
|
}
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||||
@@ -592,7 +592,7 @@ int scanhash_x15_4way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||||
{
|
{
|
||||||
pdata[19] = n+i;
|
pdata[19] = n+i;
|
||||||
submit_lane_solution( work, hash, mythr, i );
|
submit_solution( work, hash, mythr );
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
} while ( ( n < last_nonce ) && !work_restart[thr_id].restart );
|
||||||
|
@@ -30,9 +30,6 @@
|
|||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
|
||||||
|
|
||||||
static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
||||||
{
|
{
|
||||||
char *sptr = output;
|
char *sptr = output;
|
||||||
@@ -50,6 +47,7 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
|||||||
*sptr = '\0';
|
*sptr = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
union _hex_context_overlay
|
union _hex_context_overlay
|
||||||
{
|
{
|
||||||
#if defined(__AES__)
|
#if defined(__AES__)
|
||||||
@@ -66,7 +64,7 @@ union _hex_context_overlay
|
|||||||
sph_keccak512_context keccak;
|
sph_keccak512_context keccak;
|
||||||
hashState_luffa luffa;
|
hashState_luffa luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
sph_shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
hashState_sd simd;
|
hashState_sd simd;
|
||||||
sph_hamsi512_context hamsi;
|
sph_hamsi512_context hamsi;
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
@@ -75,18 +73,19 @@ union _hex_context_overlay
|
|||||||
SHA512_CTX sha512;
|
SHA512_CTX sha512;
|
||||||
};
|
};
|
||||||
typedef union _hex_context_overlay hex_context_overlay;
|
typedef union _hex_context_overlay hex_context_overlay;
|
||||||
|
*/
|
||||||
|
|
||||||
static __thread hex_context_overlay hex_ctx;
|
static __thread x16r_context_overlay hex_ctx;
|
||||||
|
|
||||||
void hex_hash( void* output, const void* input )
|
int hex_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[16];
|
uint32_t _ALIGN(128) hash[16];
|
||||||
hex_context_overlay ctx;
|
x16r_context_overlay ctx;
|
||||||
memcpy( &ctx, &hex_ctx, sizeof(ctx) );
|
memcpy( &ctx, &hex_ctx, sizeof(ctx) );
|
||||||
void *in = (void*) input;
|
void *in = (void*) input;
|
||||||
int size = 80;
|
int size = 80;
|
||||||
|
|
||||||
char elem = hashOrder[0];
|
char elem = x16r_hash_order[0];
|
||||||
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
for ( int i = 0; i < 16; i++ )
|
for ( int i = 0; i < 16; i++ )
|
||||||
@@ -160,9 +159,7 @@ void hex_hash( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SHAVITE:
|
case SHAVITE:
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash, in, size );
|
||||||
sph_shavite512( &ctx.shavite, in, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash );
|
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
init_sd( &ctx.simd, 512 );
|
init_sd( &ctx.simd, 512 );
|
||||||
@@ -190,9 +187,7 @@ void hex_hash( void* output, const void* input )
|
|||||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
||||||
sph_fugue512( &ctx.fugue, in, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash );
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
@@ -206,13 +201,12 @@ void hex_hash( void* output, const void* input )
|
|||||||
break;
|
break;
|
||||||
case WHIRLPOOL:
|
case WHIRLPOOL:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
|
||||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
|
||||||
}
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
|
||||||
break;
|
break;
|
||||||
case SHA_512:
|
case SHA_512:
|
||||||
SHA512_Init( &ctx.sha512 );
|
SHA512_Init( &ctx.sha512 );
|
||||||
@@ -220,11 +214,15 @@ void hex_hash( void* output, const void* input )
|
|||||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
algo = (uint8_t)hash[0] % X16R_HASH_FUNC_COUNT;
|
algo = (uint8_t)hash[0] % X16R_HASH_FUNC_COUNT;
|
||||||
in = (void*) hash;
|
in = (void*) hash;
|
||||||
size = 64;
|
size = 64;
|
||||||
}
|
}
|
||||||
memcpy(output, hash, 32);
|
memcpy(output, hash, 32);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_hex( struct work *work, uint32_t max_nonce,
|
int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||||
@@ -235,7 +233,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const uint32_t last_nonce = max_nonce - 4;
|
const uint32_t last_nonce = max_nonce;
|
||||||
const int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
@@ -244,17 +242,18 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
mm128_bswap32_80( edata, pdata );
|
mm128_bswap32_80( edata, pdata );
|
||||||
|
|
||||||
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
uint32_t ntime = swab32(pdata[17]);
|
uint32_t ntime = swab32(pdata[17]);
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != ntime )
|
||||||
{
|
{
|
||||||
hex_getAlgoString( (const uint32_t*) (&edata[1]), hashOrder );
|
hex_getAlgoString( (const uint32_t*) (&edata[1]), x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||||
const char elem = hashOrder[0];
|
const char elem = x16r_hash_order[0];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
{
|
{
|
||||||
@@ -291,8 +290,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
edata[19] = nonce;
|
edata[19] = nonce;
|
||||||
hex_hash( hash32, edata );
|
if ( hex_hash( hash32, edata, thr_id ) );
|
||||||
|
|
||||||
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
be32enc( &pdata[19], nonce );
|
be32enc( &pdata[19], nonce );
|
||||||
|
258
algo/x16/minotaur.c
Normal file
258
algo/x16/minotaur.c
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
// Minotaur hash
|
||||||
|
|
||||||
|
#include "algo-gate-api.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "algo/blake/sph_blake.h"
|
||||||
|
#include "algo/bmw/sph_bmw.h"
|
||||||
|
#include "algo/jh/sph_jh.h"
|
||||||
|
#include "algo/keccak/sph_keccak.h"
|
||||||
|
#include "algo/skein/sph_skein.h"
|
||||||
|
#include "algo/shavite/sph_shavite.h"
|
||||||
|
#include "algo/luffa/luffa_for_sse2.h"
|
||||||
|
#include "algo/cubehash/cubehash_sse2.h"
|
||||||
|
#include "algo/simd/nist.h"
|
||||||
|
#include "algo/hamsi/sph_hamsi.h"
|
||||||
|
#include "algo/fugue/sph_fugue.h"
|
||||||
|
#include "algo/shabal/sph_shabal.h"
|
||||||
|
#include "algo/whirlpool/sph_whirlpool.h"
|
||||||
|
#include <openssl/sha.h>
|
||||||
|
#if defined(__AES__)
|
||||||
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
|
#else
|
||||||
|
#include "algo/echo/sph_echo.h"
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Config
|
||||||
|
#define MINOTAUR_ALGO_COUNT 16
|
||||||
|
|
||||||
|
typedef struct TortureNode TortureNode;
|
||||||
|
typedef struct TortureGarden TortureGarden;
|
||||||
|
|
||||||
|
// Graph of hash algos plus SPH contexts
|
||||||
|
struct TortureGarden {
|
||||||
|
#if defined(__AES__)
|
||||||
|
hashState_echo echo;
|
||||||
|
hashState_groestl groestl;
|
||||||
|
#else
|
||||||
|
sph_echo512_context echo;
|
||||||
|
sph_groestl512_context groestl;
|
||||||
|
#endif
|
||||||
|
sph_blake512_context blake;
|
||||||
|
sph_bmw512_context bmw;
|
||||||
|
sph_skein512_context skein;
|
||||||
|
sph_jh512_context jh;
|
||||||
|
sph_keccak512_context keccak;
|
||||||
|
hashState_luffa luffa;
|
||||||
|
cubehashParam cube;
|
||||||
|
shavite512_context shavite;
|
||||||
|
hashState_sd simd;
|
||||||
|
sph_hamsi512_context hamsi;
|
||||||
|
sph_fugue512_context fugue;
|
||||||
|
sph_shabal512_context shabal;
|
||||||
|
sph_whirlpool_context whirlpool;
|
||||||
|
SHA512_CTX sha512;
|
||||||
|
|
||||||
|
struct TortureNode {
|
||||||
|
unsigned int algo;
|
||||||
|
TortureNode *childLeft;
|
||||||
|
TortureNode *childRight;
|
||||||
|
} nodes[22];
|
||||||
|
} __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
// Get a 64-byte hash for given 64-byte input, using given TortureGarden contexts and given algo index
|
||||||
|
static void get_hash( void *output, const void *input, TortureGarden *garden,
|
||||||
|
unsigned int algo )
|
||||||
|
{
|
||||||
|
unsigned char hash[64] __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
switch (algo) {
|
||||||
|
case 0:
|
||||||
|
sph_blake512_init(&garden->blake);
|
||||||
|
sph_blake512(&garden->blake, input, 64);
|
||||||
|
sph_blake512_close(&garden->blake, hash);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
sph_bmw512_init(&garden->bmw);
|
||||||
|
sph_bmw512(&garden->bmw, input, 64);
|
||||||
|
sph_bmw512_close(&garden->bmw, hash);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
cubehashInit( &garden->cube, 512, 16, 32 );
|
||||||
|
cubehashUpdateDigest( &garden->cube, (byte*)hash,
|
||||||
|
(const byte*)input, 64 );
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
#if defined(__AES__)
|
||||||
|
echo_full( &garden->echo, (BitSequence *)hash, 512,
|
||||||
|
(const BitSequence *)input, 64 );
|
||||||
|
#else
|
||||||
|
sph_echo512_init(&garden->echo);
|
||||||
|
sph_echo512(&garden->echo, input, 64);
|
||||||
|
sph_echo512_close(&garden->echo, hash);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
sph_fugue512_init(&garden->fugue);
|
||||||
|
sph_fugue512(&garden->fugue, input, 64);
|
||||||
|
sph_fugue512_close(&garden->fugue, hash);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
#if defined(__AES__)
|
||||||
|
groestl512_full( &garden->groestl, (char*)hash, (char*)input, 512 );
|
||||||
|
#else
|
||||||
|
sph_groestl512_init(&garden->groestl);
|
||||||
|
sph_groestl512(&garden->groestl, input, 64);
|
||||||
|
sph_groestl512_close(&garden->groestl, hash);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
sph_hamsi512_init(&garden->hamsi);
|
||||||
|
sph_hamsi512(&garden->hamsi, input, 64);
|
||||||
|
sph_hamsi512_close(&garden->hamsi, hash);
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
SHA512_Init( &garden->sha512 );
|
||||||
|
SHA512_Update( &garden->sha512, input, 64 );
|
||||||
|
SHA512_Final( (unsigned char*)hash, &garden->sha512 );
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
sph_jh512_init(&garden->jh);
|
||||||
|
sph_jh512(&garden->jh, input, 64);
|
||||||
|
sph_jh512_close(&garden->jh, hash);
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
sph_keccak512_init(&garden->keccak);
|
||||||
|
sph_keccak512(&garden->keccak, input, 64);
|
||||||
|
sph_keccak512_close(&garden->keccak, hash);
|
||||||
|
break;
|
||||||
|
case 10:
|
||||||
|
init_luffa( &garden->luffa, 512 );
|
||||||
|
update_and_final_luffa( &garden->luffa, (BitSequence*)hash,
|
||||||
|
(const BitSequence*)input, 64 );
|
||||||
|
break;
|
||||||
|
case 11:
|
||||||
|
sph_shabal512_init(&garden->shabal);
|
||||||
|
sph_shabal512(&garden->shabal, input, 64);
|
||||||
|
sph_shabal512_close(&garden->shabal, hash);
|
||||||
|
break;
|
||||||
|
case 12:
|
||||||
|
sph_shavite512_init(&garden->shavite);
|
||||||
|
sph_shavite512(&garden->shavite, input, 64);
|
||||||
|
sph_shavite512_close(&garden->shavite, hash);
|
||||||
|
break;
|
||||||
|
case 13:
|
||||||
|
init_sd( &garden->simd, 512 );
|
||||||
|
update_final_sd( &garden->simd, (BitSequence *)hash,
|
||||||
|
(const BitSequence*)input, 512 );
|
||||||
|
break;
|
||||||
|
case 14:
|
||||||
|
sph_skein512_init(&garden->skein);
|
||||||
|
sph_skein512(&garden->skein, input, 64);
|
||||||
|
sph_skein512_close(&garden->skein, hash);
|
||||||
|
break;
|
||||||
|
case 15:
|
||||||
|
sph_whirlpool_init(&garden->whirlpool);
|
||||||
|
sph_whirlpool(&garden->whirlpool, input, 64);
|
||||||
|
sph_whirlpool_close(&garden->whirlpool, hash);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output the hash
|
||||||
|
memcpy(output, hash, 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively traverse a given torture garden starting with a given hash and given node within the garden. The hash is overwritten with the final hash.
|
||||||
|
static void traverse_garden( TortureGarden *garden, void *hash,
|
||||||
|
TortureNode *node )
|
||||||
|
{
|
||||||
|
unsigned char partialHash[64] __attribute__ ((aligned (64)));
|
||||||
|
get_hash(partialHash, hash, garden, node->algo);
|
||||||
|
|
||||||
|
if ( partialHash[63] % 2 == 0 )
|
||||||
|
{ // Last byte of output hash is even
|
||||||
|
if ( node->childLeft != NULL )
|
||||||
|
traverse_garden( garden, partialHash, node->childLeft );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // Last byte of output hash is odd
|
||||||
|
if ( node->childRight != NULL )
|
||||||
|
traverse_garden( garden, partialHash, node->childRight );
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy( hash, partialHash, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Associate child nodes with a parent node
|
||||||
|
static inline void link_nodes( TortureNode *parent, TortureNode *childLeft,
|
||||||
|
TortureNode *childRight )
|
||||||
|
{
|
||||||
|
parent->childLeft = childLeft;
|
||||||
|
parent->childRight = childRight;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __thread TortureGarden garden;
|
||||||
|
|
||||||
|
bool initialize_torture_garden()
|
||||||
|
{
|
||||||
|
// Create torture garden nodes. Note that both sides of 19 and 20 lead to 21, and 21 has no children (to make traversal complete).
|
||||||
|
link_nodes(&garden.nodes[0], &garden.nodes[1], &garden.nodes[2]);
|
||||||
|
link_nodes(&garden.nodes[1], &garden.nodes[3], &garden.nodes[4]);
|
||||||
|
link_nodes(&garden.nodes[2], &garden.nodes[5], &garden.nodes[6]);
|
||||||
|
link_nodes(&garden.nodes[3], &garden.nodes[7], &garden.nodes[8]);
|
||||||
|
link_nodes(&garden.nodes[4], &garden.nodes[9], &garden.nodes[10]);
|
||||||
|
link_nodes(&garden.nodes[5], &garden.nodes[11], &garden.nodes[12]);
|
||||||
|
link_nodes(&garden.nodes[6], &garden.nodes[13], &garden.nodes[14]);
|
||||||
|
link_nodes(&garden.nodes[7], &garden.nodes[15], &garden.nodes[16]);
|
||||||
|
link_nodes(&garden.nodes[8], &garden.nodes[15], &garden.nodes[16]);
|
||||||
|
link_nodes(&garden.nodes[9], &garden.nodes[15], &garden.nodes[16]);
|
||||||
|
link_nodes(&garden.nodes[10], &garden.nodes[15], &garden.nodes[16]);
|
||||||
|
link_nodes(&garden.nodes[11], &garden.nodes[17], &garden.nodes[18]);
|
||||||
|
link_nodes(&garden.nodes[12], &garden.nodes[17], &garden.nodes[18]);
|
||||||
|
link_nodes(&garden.nodes[13], &garden.nodes[17], &garden.nodes[18]);
|
||||||
|
link_nodes(&garden.nodes[14], &garden.nodes[17], &garden.nodes[18]);
|
||||||
|
link_nodes(&garden.nodes[15], &garden.nodes[19], &garden.nodes[20]);
|
||||||
|
link_nodes(&garden.nodes[16], &garden.nodes[19], &garden.nodes[20]);
|
||||||
|
link_nodes(&garden.nodes[17], &garden.nodes[19], &garden.nodes[20]);
|
||||||
|
link_nodes(&garden.nodes[18], &garden.nodes[19], &garden.nodes[20]);
|
||||||
|
link_nodes(&garden.nodes[19], &garden.nodes[21], &garden.nodes[21]);
|
||||||
|
link_nodes(&garden.nodes[20], &garden.nodes[21], &garden.nodes[21]);
|
||||||
|
garden.nodes[21].childLeft = NULL;
|
||||||
|
garden.nodes[21].childRight = NULL;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Produce a 32-byte hash from 80-byte input data
|
||||||
|
int minotaur_hash( void *output, const void *input )
|
||||||
|
{
|
||||||
|
unsigned char hash[64] __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
// Find initial sha512 hash
|
||||||
|
SHA512_Init( &garden.sha512 );
|
||||||
|
SHA512_Update( &garden.sha512, input, 80 );
|
||||||
|
SHA512_Final( (unsigned char*) hash, &garden.sha512 );
|
||||||
|
|
||||||
|
// Assign algos to torture garden nodes based on initial hash
|
||||||
|
for ( int i = 0; i < 22; i++ )
|
||||||
|
garden.nodes[i].algo = hash[i] % MINOTAUR_ALGO_COUNT;
|
||||||
|
|
||||||
|
// Send the initial hash through the torture garden
|
||||||
|
traverse_garden( &garden, hash, &garden.nodes[0] );
|
||||||
|
|
||||||
|
memcpy( output, hash, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool register_minotaur_algo( algo_gate_t* gate )
|
||||||
|
{
|
||||||
|
gate->hash = (void*)&minotaur_hash;
|
||||||
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
|
gate->miner_thread_init = (void*)&initialize_torture_garden;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
@@ -80,7 +80,7 @@ void x16r_8way_prehash( void *vdata, void *pdata )
|
|||||||
// Called by wrapper hash function to optionally continue hashing and
|
// Called by wrapper hash function to optionally continue hashing and
|
||||||
// convert to final hash.
|
// convert to final hash.
|
||||||
|
|
||||||
void x16r_8way_hash_generic( void* output, const void* input )
|
int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
|
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t hash0[20] __attribute__ ((aligned (64)));
|
uint32_t hash0[20] __attribute__ ((aligned (64)));
|
||||||
@@ -287,30 +287,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
|
|||||||
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
|
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
|
||||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||||
#else
|
#else
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||||
sph_shavite512( &ctx.shavite, in0, size );
|
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||||
sph_shavite512( &ctx.shavite, in1, size );
|
shavite512_full( &ctx.shavite, hash4, in4, size );
|
||||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
shavite512_full( &ctx.shavite, hash5, in5, size );
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash6, in6, size );
|
||||||
sph_shavite512( &ctx.shavite, in2, size );
|
shavite512_full( &ctx.shavite, hash7, in7, size );
|
||||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in3, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in4, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in5, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in6, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in7, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
@@ -363,30 +347,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
|
|||||||
hash7, vhash );
|
hash7, vhash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||||
sph_fugue512( &ctx.fugue, in0, size );
|
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||||
sph_fugue512( &ctx.fugue, in1, size );
|
sph_fugue512_full( &ctx.fugue, hash4, in4, size );
|
||||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
sph_fugue512_full( &ctx.fugue, hash5, in5, size );
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash6, in6, size );
|
||||||
sph_fugue512( &ctx.fugue, in2, size );
|
sph_fugue512_full( &ctx.fugue, hash7, in7, size );
|
||||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in3, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in4, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in5, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in6, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in7, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||||
@@ -431,30 +399,14 @@ void x16r_8way_hash_generic( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
sph_whirlpool512_full( &ctx.whirlpool, hash4, in4, size );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
sph_whirlpool512_full( &ctx.whirlpool, hash5, in5, size );
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash6, in6, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
sph_whirlpool512_full( &ctx.whirlpool, hash7, in7, size );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in4, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in5, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in6, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in7, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SHA_512:
|
case SHA_512:
|
||||||
@@ -472,6 +424,9 @@ void x16r_8way_hash_generic( void* output, const void* input )
|
|||||||
hash7, vhash );
|
hash7, vhash );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
size = 64;
|
size = 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -483,14 +438,17 @@ void x16r_8way_hash_generic( void* output, const void* input )
|
|||||||
memcpy( output+320, hash5, 64 );
|
memcpy( output+320, hash5, 64 );
|
||||||
memcpy( output+384, hash6, 64 );
|
memcpy( output+384, hash6, 64 );
|
||||||
memcpy( output+448, hash7, 64 );
|
memcpy( output+448, hash7, 64 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// x16-r,-s,-rt wrapper called directly by scanhash to repackage 512 bit
|
// x16-r,-s,-rt wrapper called directly by scanhash to repackage 512 bit
|
||||||
// hash to 256 bit final hash.
|
// hash to 256 bit final hash.
|
||||||
void x16r_8way_hash( void* output, const void* input )
|
int x16r_8way_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint8_t hash[64*8] __attribute__ ((aligned (128)));
|
uint8_t hash[64*8] __attribute__ ((aligned (128)));
|
||||||
x16r_8way_hash_generic( hash, input );
|
if ( !x16r_8way_hash_generic( hash, input, thrid ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
memcpy( output, hash, 32 );
|
memcpy( output, hash, 32 );
|
||||||
memcpy( output+32, hash+64, 32 );
|
memcpy( output+32, hash+64, 32 );
|
||||||
@@ -500,6 +458,8 @@ void x16r_8way_hash( void* output, const void* input )
|
|||||||
memcpy( output+160, hash+320, 32 );
|
memcpy( output+160, hash+320, 32 );
|
||||||
memcpy( output+192, hash+384, 32 );
|
memcpy( output+192, hash+384, 32 );
|
||||||
memcpy( output+224, hash+448, 32 );
|
memcpy( output+224, hash+448, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// x16r only
|
// x16r only
|
||||||
@@ -540,13 +500,12 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
|||||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
x16r_8way_hash( hash, vdata );
|
if( x16r_8way_hash( hash, vdata, thr_id ) );
|
||||||
|
|
||||||
for ( int i = 0; i < 8; i++ )
|
for ( int i = 0; i < 8; i++ )
|
||||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n+i );
|
pdata[19] = bswap_32( n+i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev,
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
m512_const1_64( 0x0000000800000000 ) );
|
m512_const1_64( 0x0000000800000000 ) );
|
||||||
@@ -576,8 +535,7 @@ void x16r_4way_prehash( void *vdata, void *pdata )
|
|||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
skein512_4way_init( &x16r_ctx.skein );
|
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
||||||
skein512_4way_update( &x16r_ctx.skein, vdata, 64 );
|
|
||||||
break;
|
break;
|
||||||
case LUFFA:
|
case LUFFA:
|
||||||
mm128_bswap32_80( edata, pdata );
|
mm128_bswap32_80( edata, pdata );
|
||||||
@@ -614,7 +572,7 @@ void x16r_4way_prehash( void *vdata, void *pdata )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void x16r_4way_hash_generic( void* output, const void* input )
|
int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t vhash[20*4] __attribute__ ((aligned (128)));
|
uint32_t vhash[20*4] __attribute__ ((aligned (128)));
|
||||||
uint32_t hash0[20] __attribute__ ((aligned (64)));
|
uint32_t hash0[20] __attribute__ ((aligned (64)));
|
||||||
@@ -692,14 +650,12 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
|||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
|
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
skein512_4way_init( &ctx.skein );
|
skein512_4way_full( &ctx.skein, vhash, vhash, size );
|
||||||
skein512_4way_update( &ctx.skein, vhash, size );
|
|
||||||
}
|
}
|
||||||
skein512_4way_close( &ctx.skein, vhash );
|
|
||||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||||
break;
|
break;
|
||||||
case LUFFA:
|
case LUFFA:
|
||||||
@@ -755,18 +711,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SHAVITE:
|
case SHAVITE:
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||||
sph_shavite512( &ctx.shavite, in0, size );
|
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||||
sph_shavite512( &ctx.shavite, in1, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in2, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in3, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||||
@@ -799,18 +747,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
|||||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||||
sph_fugue512( &ctx.fugue, in0, size );
|
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||||
sph_fugue512( &ctx.fugue, in1, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in2, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in3, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
@@ -841,18 +781,10 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SHA_512:
|
case SHA_512:
|
||||||
@@ -869,23 +801,31 @@ void x16r_4way_hash_generic( void* output, const void* input )
|
|||||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
size = 64;
|
size = 64;
|
||||||
}
|
}
|
||||||
memcpy( output, hash0, 64 );
|
memcpy( output, hash0, 64 );
|
||||||
memcpy( output+64, hash1, 64 );
|
memcpy( output+64, hash1, 64 );
|
||||||
memcpy( output+128, hash2, 64 );
|
memcpy( output+128, hash2, 64 );
|
||||||
memcpy( output+192, hash3, 64 );
|
memcpy( output+192, hash3, 64 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void x16r_4way_hash( void* output, const void* input )
|
int x16r_4way_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint8_t hash[64*4] __attribute__ ((aligned (64)));
|
uint8_t hash[64*4] __attribute__ ((aligned (64)));
|
||||||
x16r_4way_hash_generic( hash, input );
|
if ( !x16r_4way_hash_generic( hash, input, thrid ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
memcpy( output, hash, 32 );
|
memcpy( output, hash, 32 );
|
||||||
memcpy( output+32, hash+64, 32 );
|
memcpy( output+32, hash+64, 32 );
|
||||||
memcpy( output+64, hash+128, 32 );
|
memcpy( output+64, hash+128, 32 );
|
||||||
memcpy( output+96, hash+192, 32 );
|
memcpy( output+96, hash+192, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||||
@@ -924,12 +864,12 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
|||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
x16r_4way_hash( hash, vdata );
|
if ( x16r_4way_hash( hash, vdata, thr_id ) );
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n+i );
|
pdata[19] = bswap_32( n+i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev,
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
m256_const1_64( 0x0000000400000000 ) );
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
|
@@ -121,7 +121,7 @@ union _x16r_8way_context_overlay
|
|||||||
echo_4way_context echo;
|
echo_4way_context echo;
|
||||||
#else
|
#else
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
sph_shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
#endif
|
#endif
|
||||||
} __attribute__ ((aligned (64)));
|
} __attribute__ ((aligned (64)));
|
||||||
@@ -131,8 +131,8 @@ typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
|
|||||||
extern __thread x16r_8way_context_overlay x16r_ctx;
|
extern __thread x16r_8way_context_overlay x16r_ctx;
|
||||||
|
|
||||||
void x16r_8way_prehash( void *, void * );
|
void x16r_8way_prehash( void *, void * );
|
||||||
void x16r_8way_hash_generic( void *, const void * );
|
int x16r_8way_hash_generic( void *, const void *, int );
|
||||||
void x16r_8way_hash( void *, const void * );
|
int x16r_8way_hash( void *, const void *, int );
|
||||||
int scanhash_x16r_8way( struct work *, uint32_t ,
|
int scanhash_x16r_8way( struct work *, uint32_t ,
|
||||||
uint64_t *, struct thr_info * );
|
uint64_t *, struct thr_info * );
|
||||||
extern __thread x16r_8way_context_overlay x16r_ctx;
|
extern __thread x16r_8way_context_overlay x16r_ctx;
|
||||||
@@ -152,7 +152,7 @@ union _x16r_4way_context_overlay
|
|||||||
luffa_2way_context luffa;
|
luffa_2way_context luffa;
|
||||||
hashState_luffa luffa1;
|
hashState_luffa luffa1;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
sph_shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
simd_2way_context simd;
|
simd_2way_context simd;
|
||||||
hamsi512_4way_context hamsi;
|
hamsi512_4way_context hamsi;
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
@@ -166,8 +166,8 @@ typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
|
|||||||
extern __thread x16r_4way_context_overlay x16r_ctx;
|
extern __thread x16r_4way_context_overlay x16r_ctx;
|
||||||
|
|
||||||
void x16r_4way_prehash( void *, void * );
|
void x16r_4way_prehash( void *, void * );
|
||||||
void x16r_4way_hash_generic( void *, const void * );
|
int x16r_4way_hash_generic( void *, const void *, int );
|
||||||
void x16r_4way_hash( void *, const void * );
|
int x16r_4way_hash( void *, const void *, int );
|
||||||
int scanhash_x16r_4way( struct work *, uint32_t,
|
int scanhash_x16r_4way( struct work *, uint32_t,
|
||||||
uint64_t *, struct thr_info * );
|
uint64_t *, struct thr_info * );
|
||||||
extern __thread x16r_4way_context_overlay x16r_ctx;
|
extern __thread x16r_4way_context_overlay x16r_ctx;
|
||||||
@@ -191,7 +191,7 @@ union _x16r_context_overlay
|
|||||||
sph_keccak512_context keccak;
|
sph_keccak512_context keccak;
|
||||||
hashState_luffa luffa;
|
hashState_luffa luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
sph_shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
hashState_sd simd;
|
hashState_sd simd;
|
||||||
sph_hamsi512_context hamsi;
|
sph_hamsi512_context hamsi;
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
@@ -205,26 +205,26 @@ typedef union _x16r_context_overlay x16r_context_overlay;
|
|||||||
extern __thread x16r_context_overlay x16_ctx;
|
extern __thread x16r_context_overlay x16_ctx;
|
||||||
|
|
||||||
void x16r_prehash( void *, void * );
|
void x16r_prehash( void *, void * );
|
||||||
void x16r_hash_generic( void *, const void * );
|
int x16r_hash_generic( void *, const void *, int );
|
||||||
void x16r_hash( void *, const void * );
|
int x16r_hash( void *, const void *, int );
|
||||||
int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );
|
int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );
|
||||||
|
|
||||||
// x16Rv2
|
// x16Rv2
|
||||||
#if defined(X16RV2_8WAY)
|
#if defined(X16RV2_8WAY)
|
||||||
|
|
||||||
void x16rv2_8way_hash( void *state, const void *input );
|
int x16rv2_8way_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#elif defined(X16RV2_4WAY)
|
#elif defined(X16RV2_4WAY)
|
||||||
|
|
||||||
void x16rv2_4way_hash( void *state, const void *input );
|
int x16rv2_4way_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void x16rv2_hash( void *state, const void *input );
|
int x16rv2_hash( void *state, const void *input, int thr_id );
|
||||||
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
@@ -254,21 +254,21 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
|||||||
// x21s
|
// x21s
|
||||||
#if defined(X16R_8WAY)
|
#if defined(X16R_8WAY)
|
||||||
|
|
||||||
void x21s_8way_hash( void *state, const void *input );
|
int x21s_8way_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool x21s_8way_thread_init();
|
bool x21s_8way_thread_init();
|
||||||
|
|
||||||
#elif defined(X16R_4WAY)
|
#elif defined(X16R_4WAY)
|
||||||
|
|
||||||
void x21s_4way_hash( void *state, const void *input );
|
int x21s_4way_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool x21s_4way_thread_init();
|
bool x21s_4way_thread_init();
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void x21s_hash( void *state, const void *input );
|
int x21s_hash( void *state, const void *input, int thr_id );
|
||||||
int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
bool x21s_thread_init();
|
bool x21s_thread_init();
|
||||||
|
@@ -48,7 +48,7 @@ void x16r_prehash( void *edata, void *pdata )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void x16r_hash_generic( void* output, const void* input )
|
int x16r_hash_generic( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[16];
|
uint32_t _ALIGN(128) hash[16];
|
||||||
x16r_context_overlay ctx;
|
x16r_context_overlay ctx;
|
||||||
@@ -124,9 +124,7 @@ void x16r_hash_generic( void* output, const void* input )
|
|||||||
(byte*)in, size );
|
(byte*)in, size );
|
||||||
break;
|
break;
|
||||||
case SHAVITE:
|
case SHAVITE:
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash, in, size );
|
||||||
sph_shavite512( &ctx.shavite, in, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash );
|
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
simd_full( &ctx.simd, (BitSequence *)hash,
|
simd_full( &ctx.simd, (BitSequence *)hash,
|
||||||
@@ -153,9 +151,7 @@ void x16r_hash_generic( void* output, const void* input )
|
|||||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
||||||
sph_fugue512( &ctx.fugue, in, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash );
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
@@ -169,13 +165,12 @@ void x16r_hash_generic( void* output, const void* input )
|
|||||||
break;
|
break;
|
||||||
case WHIRLPOOL:
|
case WHIRLPOOL:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool( &ctx.whirlpool, in+64, 16 );
|
||||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
|
||||||
}
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
sph_whirlpool_close( &ctx.whirlpool, hash );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
|
||||||
break;
|
break;
|
||||||
case SHA_512:
|
case SHA_512:
|
||||||
SHA512_Init( &ctx.sha512 );
|
SHA512_Init( &ctx.sha512 );
|
||||||
@@ -183,18 +178,24 @@ void x16r_hash_generic( void* output, const void* input )
|
|||||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
in = (void*) hash;
|
in = (void*) hash;
|
||||||
size = 64;
|
size = 64;
|
||||||
}
|
}
|
||||||
memcpy( output, hash, 64 );
|
memcpy( output, hash, 64 );
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void x16r_hash( void* output, const void* input )
|
int x16r_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint8_t hash[64] __attribute__ ((aligned (64)));
|
uint8_t hash[64] __attribute__ ((aligned (64)));
|
||||||
x16r_hash_generic( hash, input );
|
if ( !x16r_hash_generic( hash, input, thrid ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
memcpy( output, hash, 32 );
|
memcpy( output, hash, 32 );
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||||
@@ -228,8 +229,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
edata[19] = nonce;
|
edata[19] = nonce;
|
||||||
x16r_hash( hash32, edata );
|
if ( x16r_hash( hash32, edata, thr_id ) )
|
||||||
|
|
||||||
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( nonce );
|
pdata[19] = bswap_32( nonce );
|
||||||
@@ -238,7 +238,7 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
|||||||
nonce++;
|
nonce++;
|
||||||
} while ( nonce < max_nonce && !(*restart) );
|
} while ( nonce < max_nonce && !(*restart) );
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -41,13 +41,12 @@ int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
|||||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
x16r_8way_hash( hash, vdata );
|
if ( x16r_8way_hash( hash, vdata, thr_id ) )
|
||||||
|
|
||||||
for ( int i = 0; i < 8; i++ )
|
for ( int i = 0; i < 8; i++ )
|
||||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n+i );
|
pdata[19] = bswap_32( n+i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev,
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
m512_const1_64( 0x0000000800000000 ) );
|
m512_const1_64( 0x0000000800000000 ) );
|
||||||
@@ -95,12 +94,12 @@ int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
|||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
x16r_4way_hash( hash, vdata );
|
if ( x16r_4way_hash( hash, vdata, thr_id ) )
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n+i );
|
pdata[19] = bswap_32( n+i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev,
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
m256_const1_64( 0x0000000400000000 ) );
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
|
@@ -36,8 +36,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
edata[19] = nonce;
|
edata[19] = nonce;
|
||||||
x16r_hash( hash32, edata );
|
if ( x16r_hash( hash32, edata, thr_id ) )
|
||||||
|
|
||||||
if ( valid_hash( hash32, ptarget ) && !bench )
|
if ( valid_hash( hash32, ptarget ) && !bench )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( nonce );
|
pdata[19] = bswap_32( nonce );
|
||||||
@@ -46,7 +45,7 @@ int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
|||||||
nonce++;
|
nonce++;
|
||||||
} while ( nonce < max_nonce && !(*restart) );
|
} while ( nonce < max_nonce && !(*restart) );
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -35,9 +35,6 @@
|
|||||||
|
|
||||||
#if defined (X16RV2_8WAY)
|
#if defined (X16RV2_8WAY)
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
|
||||||
|
|
||||||
union _x16rv2_8way_context_overlay
|
union _x16rv2_8way_context_overlay
|
||||||
{
|
{
|
||||||
blake512_8way_context blake;
|
blake512_8way_context blake;
|
||||||
@@ -60,7 +57,7 @@ union _x16rv2_8way_context_overlay
|
|||||||
echo_4way_context echo;
|
echo_4way_context echo;
|
||||||
#else
|
#else
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
sph_shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
#endif
|
#endif
|
||||||
} __attribute__ ((aligned (64)));
|
} __attribute__ ((aligned (64)));
|
||||||
@@ -68,7 +65,7 @@ union _x16rv2_8way_context_overlay
|
|||||||
typedef union _x16rv2_8way_context_overlay x16rv2_8way_context_overlay;
|
typedef union _x16rv2_8way_context_overlay x16rv2_8way_context_overlay;
|
||||||
static __thread x16rv2_8way_context_overlay x16rv2_ctx;
|
static __thread x16rv2_8way_context_overlay x16rv2_ctx;
|
||||||
|
|
||||||
void x16rv2_8way_hash( void* output, const void* input )
|
int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t vhash[24*8] __attribute__ ((aligned (128)));
|
uint32_t vhash[24*8] __attribute__ ((aligned (128)));
|
||||||
uint32_t hash0[24] __attribute__ ((aligned (64)));
|
uint32_t hash0[24] __attribute__ ((aligned (64)));
|
||||||
@@ -96,7 +93,7 @@ void x16rv2_8way_hash( void* output, const void* input )
|
|||||||
|
|
||||||
for ( int i = 0; i < 16; i++ )
|
for ( int i = 0; i < 16; i++ )
|
||||||
{
|
{
|
||||||
const char elem = hashOrder[i];
|
const char elem = x16r_hash_order[i];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
@@ -374,30 +371,14 @@ void x16rv2_8way_hash( void* output, const void* input )
|
|||||||
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
|
shavite512_4way_full( &ctx.shavite, vhash, vhash, size );
|
||||||
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
|
||||||
#else
|
#else
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||||
sph_shavite512( &ctx.shavite, in0, size );
|
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||||
sph_shavite512( &ctx.shavite, in1, size );
|
shavite512_full( &ctx.shavite, hash4, in4, size );
|
||||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
shavite512_full( &ctx.shavite, hash5, in5, size );
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash6, in6, size );
|
||||||
sph_shavite512( &ctx.shavite, in2, size );
|
shavite512_full( &ctx.shavite, hash7, in7, size );
|
||||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in3, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in4, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash4 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in5, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash5 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in6, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash6 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in7, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash7 );
|
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
@@ -451,30 +432,14 @@ void x16rv2_8way_hash( void* output, const void* input )
|
|||||||
hash7, vhash );
|
hash7, vhash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||||
sph_fugue512( &ctx.fugue, in0, size );
|
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||||
sph_fugue512( &ctx.fugue, in1, size );
|
sph_fugue512_full( &ctx.fugue, hash4, in4, size );
|
||||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
sph_fugue512_full( &ctx.fugue, hash5, in5, size );
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash6, in6, size );
|
||||||
sph_fugue512( &ctx.fugue, in2, size );
|
sph_fugue512_full( &ctx.fugue, hash7, in7, size );
|
||||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in3, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in4, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash4 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in5, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash5 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in6, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash6 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in7, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash7 );
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
|
||||||
@@ -519,30 +484,14 @@ void x16rv2_8way_hash( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
sph_whirlpool512_full( &ctx.whirlpool, hash4, in4, size );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
sph_whirlpool512_full( &ctx.whirlpool, hash5, in5, size );
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash6, in6, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
sph_whirlpool512_full( &ctx.whirlpool, hash7, in7, size );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in4, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash4 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in5, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash5 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in6, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash6 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in7, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash7 );
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SHA_512:
|
case SHA_512:
|
||||||
@@ -614,6 +563,9 @@ void x16rv2_8way_hash( void* output, const void* input )
|
|||||||
hash7, vhash );
|
hash7, vhash );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
size = 64;
|
size = 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -625,6 +577,7 @@ void x16rv2_8way_hash( void* output, const void* input )
|
|||||||
memcpy( output+160, hash5, 32 );
|
memcpy( output+160, hash5, 32 );
|
||||||
memcpy( output+192, hash6, 32 );
|
memcpy( output+192, hash6, 32 );
|
||||||
memcpy( output+224, hash7, 32 );
|
memcpy( output+224, hash7, 32 );
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||||
@@ -651,17 +604,19 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
bedata1[0] = bswap_32( pdata[1] );
|
bedata1[0] = bswap_32( pdata[1] );
|
||||||
bedata1[1] = bswap_32( pdata[2] );
|
bedata1[1] = bswap_32( pdata[2] );
|
||||||
|
|
||||||
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
const uint32_t ntime = bswap_32( pdata[17] );
|
const uint32_t ntime = bswap_32( pdata[17] );
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != ntime )
|
||||||
{
|
{
|
||||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_INFO, "hash order %s (%08x)", hashOrder, ntime );
|
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||||
const char elem = hashOrder[0];
|
const char elem = x16r_hash_order[0];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
{
|
{
|
||||||
@@ -718,13 +673,12 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
|||||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
x16rv2_8way_hash( hash, vdata );
|
if ( x16rv2_8way_hash( hash, vdata, thr_id ) )
|
||||||
|
|
||||||
for ( int i = 0; i < 8; i++ )
|
for ( int i = 0; i < 8; i++ )
|
||||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n+i );
|
pdata[19] = bswap_32( n+i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev,
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
m512_const1_64( 0x0000000800000000 ) );
|
m512_const1_64( 0x0000000800000000 ) );
|
||||||
@@ -737,9 +691,6 @@ int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
#elif defined (X16RV2_4WAY)
|
#elif defined (X16RV2_4WAY)
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
|
||||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
|
||||||
|
|
||||||
union _x16rv2_4way_context_overlay
|
union _x16rv2_4way_context_overlay
|
||||||
{
|
{
|
||||||
blake512_4way_context blake;
|
blake512_4way_context blake;
|
||||||
@@ -751,7 +702,7 @@ union _x16rv2_4way_context_overlay
|
|||||||
keccak512_4way_context keccak;
|
keccak512_4way_context keccak;
|
||||||
luffa_2way_context luffa;
|
luffa_2way_context luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
sph_shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
simd_2way_context simd;
|
simd_2way_context simd;
|
||||||
hamsi512_4way_context hamsi;
|
hamsi512_4way_context hamsi;
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
@@ -770,7 +721,7 @@ inline void padtiger512( uint32_t* hash )
|
|||||||
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
|
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void x16rv2_4way_hash( void* output, const void* input )
|
int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t hash0[20] __attribute__ ((aligned (64)));
|
uint32_t hash0[20] __attribute__ ((aligned (64)));
|
||||||
uint32_t hash1[20] __attribute__ ((aligned (64)));
|
uint32_t hash1[20] __attribute__ ((aligned (64)));
|
||||||
@@ -789,7 +740,7 @@ void x16rv2_4way_hash( void* output, const void* input )
|
|||||||
|
|
||||||
for ( int i = 0; i < 16; i++ )
|
for ( int i = 0; i < 16; i++ )
|
||||||
{
|
{
|
||||||
const char elem = hashOrder[i];
|
const char elem = x16r_hash_order[i];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
@@ -875,7 +826,7 @@ void x16rv2_4way_hash( void* output, const void* input )
|
|||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
if ( i == 0 )
|
if ( i == 0 )
|
||||||
skein512_4way_update( &ctx.skein, input + (64<<2), 16 );
|
skein512_4way_final16( &ctx.skein, vhash, input + (64*4) );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
@@ -959,18 +910,10 @@ void x16rv2_4way_hash( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SHAVITE:
|
case SHAVITE:
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash0, in0, size );
|
||||||
sph_shavite512( &ctx.shavite, in0, size );
|
shavite512_full( &ctx.shavite, hash1, in1, size );
|
||||||
sph_shavite512_close( &ctx.shavite, hash0 );
|
shavite512_full( &ctx.shavite, hash2, in2, size );
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash3, in3, size );
|
||||||
sph_shavite512( &ctx.shavite, in1, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in2, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash2 );
|
|
||||||
sph_shavite512_init( &ctx.shavite );
|
|
||||||
sph_shavite512( &ctx.shavite, in3, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash3 );
|
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
intrlv_2x128( vhash, in0, in1, size<<3 );
|
intrlv_2x128( vhash, in0, in1, size<<3 );
|
||||||
@@ -1003,18 +946,10 @@ void x16rv2_4way_hash( void* output, const void* input )
|
|||||||
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash0, in0, size );
|
||||||
sph_fugue512( &ctx.fugue, in0, size );
|
sph_fugue512_full( &ctx.fugue, hash1, in1, size );
|
||||||
sph_fugue512_close( &ctx.fugue, hash0 );
|
sph_fugue512_full( &ctx.fugue, hash2, in2, size );
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash3, in3, size );
|
||||||
sph_fugue512( &ctx.fugue, in1, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash1 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in2, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash2 );
|
|
||||||
sph_fugue512_init( &ctx.fugue );
|
|
||||||
sph_fugue512( &ctx.fugue, in3, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
|
||||||
@@ -1045,18 +980,10 @@ void x16rv2_4way_hash( void* output, const void* input )
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash0, in0, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in0, size );
|
sph_whirlpool512_full( &ctx.whirlpool, hash1, in1, size );
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash0 );
|
sph_whirlpool512_full( &ctx.whirlpool, hash2, in2, size );
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash3, in3, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in1, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash1 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in2, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash2 );
|
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
|
||||||
sph_whirlpool( &ctx.whirlpool, in3, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SHA_512:
|
case SHA_512:
|
||||||
@@ -1099,12 +1026,16 @@ void x16rv2_4way_hash( void* output, const void* input )
|
|||||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
size = 64;
|
size = 64;
|
||||||
}
|
}
|
||||||
memcpy( output, hash0, 32 );
|
memcpy( output, hash0, 32 );
|
||||||
memcpy( output+32, hash1, 32 );
|
memcpy( output+32, hash1, 32 );
|
||||||
memcpy( output+64, hash2, 32 );
|
memcpy( output+64, hash2, 32 );
|
||||||
memcpy( output+96, hash3, 32 );
|
memcpy( output+96, hash3, 32 );
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||||
@@ -1121,7 +1052,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
const uint32_t last_nonce = max_nonce - 4;
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
const int thr_id = mythr->id;
|
const int thr_id = mythr->id;
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9;
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
const bool bench = opt_benchmark;
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
@@ -1130,17 +1061,19 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
bedata1[0] = bswap_32( pdata[1] );
|
bedata1[0] = bswap_32( pdata[1] );
|
||||||
bedata1[1] = bswap_32( pdata[2] );
|
bedata1[1] = bswap_32( pdata[2] );
|
||||||
|
|
||||||
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
const uint32_t ntime = bswap_32(pdata[17]);
|
const uint32_t ntime = bswap_32(pdata[17]);
|
||||||
if ( s_ntime != ntime )
|
if ( s_ntime != ntime )
|
||||||
{
|
{
|
||||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, hashOrder );
|
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||||
const char elem = hashOrder[0];
|
const char elem = x16r_hash_order[0];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
{
|
{
|
||||||
@@ -1159,8 +1092,7 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
break;
|
break;
|
||||||
case SKEIN:
|
case SKEIN:
|
||||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||||
skein512_4way_init( &x16rv2_ctx.skein );
|
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
||||||
skein512_4way_update( &x16rv2_ctx.skein, vdata, 64 );
|
|
||||||
break;
|
break;
|
||||||
case CUBEHASH:
|
case CUBEHASH:
|
||||||
mm128_bswap32_80( edata, pdata );
|
mm128_bswap32_80( edata, pdata );
|
||||||
@@ -1194,12 +1126,12 @@ int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
x16rv2_4way_hash( hash, vdata );
|
if ( x16rv2_4way_hash( hash, vdata, thr_id ) )
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n+i );
|
pdata[19] = bswap_32( n+i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev,
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
m256_const1_64( 0x0000000400000000 ) );
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
|
@@ -34,7 +34,6 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
static __thread char hashOrder[X16R_HASH_FUNC_COUNT + 1] = { 0 };
|
|
||||||
|
|
||||||
union _x16rv2_context_overlay
|
union _x16rv2_context_overlay
|
||||||
{
|
{
|
||||||
@@ -52,7 +51,7 @@ union _x16rv2_context_overlay
|
|||||||
sph_keccak512_context keccak;
|
sph_keccak512_context keccak;
|
||||||
hashState_luffa luffa;
|
hashState_luffa luffa;
|
||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
sph_shavite512_context shavite;
|
shavite512_context shavite;
|
||||||
hashState_sd simd;
|
hashState_sd simd;
|
||||||
sph_hamsi512_context hamsi;
|
sph_hamsi512_context hamsi;
|
||||||
sph_fugue512_context fugue;
|
sph_fugue512_context fugue;
|
||||||
@@ -68,22 +67,16 @@ inline void padtiger512(uint32_t* hash) {
|
|||||||
for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
|
for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void x16rv2_hash( void* output, const void* input )
|
int x16rv2_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[16];
|
uint32_t _ALIGN(128) hash[16];
|
||||||
x16rv2_context_overlay ctx;
|
x16rv2_context_overlay ctx;
|
||||||
void *in = (void*) input;
|
void *in = (void*) input;
|
||||||
int size = 80;
|
int size = 80;
|
||||||
/*
|
|
||||||
if ( s_ntime == UINT32_MAX )
|
|
||||||
{
|
|
||||||
const uint8_t* in8 = (uint8_t*) input;
|
|
||||||
x16_r_s_getAlgoString( &in8[4], hashOrder );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
for ( int i = 0; i < 16; i++ )
|
for ( int i = 0; i < 16; i++ )
|
||||||
{
|
{
|
||||||
const char elem = hashOrder[i];
|
const char elem = x16r_hash_order[i];
|
||||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||||
|
|
||||||
switch ( algo )
|
switch ( algo )
|
||||||
@@ -143,9 +136,7 @@ void x16rv2_hash( void* output, const void* input )
|
|||||||
(const byte*)in, size );
|
(const byte*)in, size );
|
||||||
break;
|
break;
|
||||||
case SHAVITE:
|
case SHAVITE:
|
||||||
sph_shavite512_init( &ctx.shavite );
|
shavite512_full( &ctx.shavite, hash, in, size );
|
||||||
sph_shavite512( &ctx.shavite, in, size );
|
|
||||||
sph_shavite512_close( &ctx.shavite, hash );
|
|
||||||
break;
|
break;
|
||||||
case SIMD:
|
case SIMD:
|
||||||
init_sd( &ctx.simd, 512 );
|
init_sd( &ctx.simd, 512 );
|
||||||
@@ -169,9 +160,7 @@ void x16rv2_hash( void* output, const void* input )
|
|||||||
sph_hamsi512_close( &ctx.hamsi, hash );
|
sph_hamsi512_close( &ctx.hamsi, hash );
|
||||||
break;
|
break;
|
||||||
case FUGUE:
|
case FUGUE:
|
||||||
sph_fugue512_init( &ctx.fugue );
|
sph_fugue512_full( &ctx.fugue, hash, in, size );
|
||||||
sph_fugue512( &ctx.fugue, in, size );
|
|
||||||
sph_fugue512_close( &ctx.fugue, hash );
|
|
||||||
break;
|
break;
|
||||||
case SHABAL:
|
case SHABAL:
|
||||||
sph_shabal512_init( &ctx.shabal );
|
sph_shabal512_init( &ctx.shabal );
|
||||||
@@ -179,9 +168,7 @@ void x16rv2_hash( void* output, const void* input )
|
|||||||
sph_shabal512_close( &ctx.shabal, hash );
|
sph_shabal512_close( &ctx.shabal, hash );
|
||||||
break;
|
break;
|
||||||
case WHIRLPOOL:
|
case WHIRLPOOL:
|
||||||
sph_whirlpool_init( &ctx.whirlpool );
|
sph_whirlpool512_full( &ctx.whirlpool, hash, in, size );
|
||||||
sph_whirlpool( &ctx.whirlpool, in, size );
|
|
||||||
sph_whirlpool_close( &ctx.whirlpool, hash );
|
|
||||||
break;
|
break;
|
||||||
case SHA_512:
|
case SHA_512:
|
||||||
sph_tiger_init( &ctx.tiger );
|
sph_tiger_init( &ctx.tiger );
|
||||||
@@ -193,58 +180,61 @@ void x16rv2_hash( void* output, const void* input )
|
|||||||
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
SHA512_Final( (unsigned char*) hash, &ctx.sha512 );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
|
|
||||||
in = (void*) hash;
|
in = (void*) hash;
|
||||||
size = 64;
|
size = 64;
|
||||||
}
|
}
|
||||||
memcpy(output, hash, 32);
|
memcpy(output, hash, 32);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash32[8];
|
uint32_t _ALIGN(128) hash32[8];
|
||||||
uint32_t _ALIGN(128) endiandata[20];
|
uint32_t _ALIGN(128) edata[20];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
const int thr_id = mythr->id;
|
||||||
uint32_t nonce = first_nonce;
|
uint32_t nonce = first_nonce;
|
||||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
|
||||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||||
|
|
||||||
|
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||||
if ( s_ntime != pdata[17] )
|
if ( s_ntime != pdata[17] )
|
||||||
{
|
{
|
||||||
uint32_t ntime = swab32(pdata[17]);
|
uint32_t ntime = swab32(pdata[17]);
|
||||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
|
||||||
s_ntime = ntime;
|
s_ntime = ntime;
|
||||||
if ( opt_debug && !thr_id )
|
if ( opt_debug && !thr_id )
|
||||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
applog( LOG_DEBUG, "hash order %s (%08x)",
|
||||||
|
x16r_hash_order, ntime );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( opt_benchmark )
|
if ( bench ) ptarget[7] = 0x0cff;
|
||||||
ptarget[7] = 0x0cff;
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
be32enc( &endiandata[19], nonce );
|
edata[19] = nonce;
|
||||||
x16rv2_hash( hash32, endiandata );
|
if ( x16rv2_hash( hash32, edata, thr_id ) )
|
||||||
|
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||||
if ( hash32[7] <= Htarg )
|
|
||||||
if (fulltest( hash32, ptarget ) && !opt_benchmark )
|
|
||||||
{
|
{
|
||||||
pdata[19] = nonce;
|
pdata[19] = bswap_32( nonce );
|
||||||
submit_solution( work, hash32, mythr );
|
submit_solution( work, hash32, mythr );
|
||||||
}
|
}
|
||||||
nonce++;
|
nonce++;
|
||||||
} while ( nonce < max_nonce && !(*restart) );
|
} while ( nonce < max_nonce && !(*restart) );
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -30,7 +30,7 @@ union _x21s_8way_context_overlay
|
|||||||
|
|
||||||
typedef union _x21s_8way_context_overlay x21s_8way_context_overlay;
|
typedef union _x21s_8way_context_overlay x21s_8way_context_overlay;
|
||||||
|
|
||||||
void x21s_8way_hash( void* output, const void* input )
|
int x21s_8way_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t vhash[16*8] __attribute__ ((aligned (128)));
|
uint32_t vhash[16*8] __attribute__ ((aligned (128)));
|
||||||
uint8_t shash[64*8] __attribute__ ((aligned (64)));
|
uint8_t shash[64*8] __attribute__ ((aligned (64)));
|
||||||
@@ -44,7 +44,8 @@ void x21s_8way_hash( void* output, const void* input )
|
|||||||
uint32_t *hash7 = (uint32_t*)( shash+448 );
|
uint32_t *hash7 = (uint32_t*)( shash+448 );
|
||||||
x21s_8way_context_overlay ctx;
|
x21s_8way_context_overlay ctx;
|
||||||
|
|
||||||
x16r_8way_hash_generic( shash, input );
|
if ( !x16r_8way_hash_generic( shash, input, thrid ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
|
||||||
hash7 );
|
hash7 );
|
||||||
@@ -124,6 +125,8 @@ void x21s_8way_hash( void* output, const void* input )
|
|||||||
sha256_8way_init( &ctx.sha256 );
|
sha256_8way_init( &ctx.sha256 );
|
||||||
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
sha256_8way_update( &ctx.sha256, vhash, 64 );
|
||||||
sha256_8way_close( &ctx.sha256, output );
|
sha256_8way_close( &ctx.sha256, output );
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||||
@@ -166,8 +169,7 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
|||||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
x21s_8way_hash( hash, vdata );
|
if ( x21s_8way_hash( hash, vdata, thr_id ) )
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||||
{
|
{
|
||||||
@@ -175,7 +177,7 @@ int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
|||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n + lane );
|
pdata[19] = bswap_32( n + lane );
|
||||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
submit_solution( work, lane_hash, mythr );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev,
|
*noncev = _mm512_add_epi32( *noncev,
|
||||||
@@ -215,7 +217,7 @@ union _x21s_4way_context_overlay
|
|||||||
|
|
||||||
typedef union _x21s_4way_context_overlay x21s_4way_context_overlay;
|
typedef union _x21s_4way_context_overlay x21s_4way_context_overlay;
|
||||||
|
|
||||||
void x21s_4way_hash( void* output, const void* input )
|
int x21s_4way_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
||||||
uint8_t shash[64*4] __attribute__ ((aligned (64)));
|
uint8_t shash[64*4] __attribute__ ((aligned (64)));
|
||||||
@@ -225,7 +227,8 @@ void x21s_4way_hash( void* output, const void* input )
|
|||||||
uint32_t *hash2 = (uint32_t*)( shash+128 );
|
uint32_t *hash2 = (uint32_t*)( shash+128 );
|
||||||
uint32_t *hash3 = (uint32_t*)( shash+192 );
|
uint32_t *hash3 = (uint32_t*)( shash+192 );
|
||||||
|
|
||||||
x16r_4way_hash_generic( shash, input );
|
if ( !x16r_4way_hash_generic( shash, input, thrid ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||||
|
|
||||||
@@ -299,6 +302,8 @@ void x21s_4way_hash( void* output, const void* input )
|
|||||||
dintrlv_4x32( output, output+32, output+64,output+96, vhash, 256 );
|
dintrlv_4x32( output, output+32, output+64,output+96, vhash, 256 );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||||
@@ -337,12 +342,12 @@ int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
|||||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
x21s_4way_hash( hash, vdata );
|
if ( x21s_4way_hash( hash, vdata, thr_id ) )
|
||||||
for ( int i = 0; i < 4; i++ )
|
for ( int i = 0; i < 4; i++ )
|
||||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( n+i );
|
pdata[19] = bswap_32( n+i );
|
||||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
submit_solution( work, hash+(i<<3), mythr );
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev,
|
*noncev = _mm256_add_epi32( *noncev,
|
||||||
m256_const1_64( 0x0000000400000000 ) );
|
m256_const1_64( 0x0000000400000000 ) );
|
||||||
|
@@ -27,12 +27,13 @@ union _x21s_context_overlay
|
|||||||
};
|
};
|
||||||
typedef union _x21s_context_overlay x21s_context_overlay;
|
typedef union _x21s_context_overlay x21s_context_overlay;
|
||||||
|
|
||||||
void x21s_hash( void* output, const void* input )
|
int x21s_hash( void* output, const void* input, int thrid )
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(128) hash[16];
|
uint32_t _ALIGN(128) hash[16];
|
||||||
x21s_context_overlay ctx;
|
x21s_context_overlay ctx;
|
||||||
|
|
||||||
x16r_hash_generic( hash, input );
|
if ( !x16r_hash_generic( hash, input, thrid ) )
|
||||||
|
return 0;
|
||||||
|
|
||||||
sph_haval256_5_init( &ctx.haval );
|
sph_haval256_5_init( &ctx.haval );
|
||||||
sph_haval256_5( &ctx.haval, (const void*) hash, 64) ;
|
sph_haval256_5( &ctx.haval, (const void*) hash, 64) ;
|
||||||
@@ -54,6 +55,8 @@ void x21s_hash( void* output, const void* input )
|
|||||||
SHA256_Final( (unsigned char*)hash, &ctx.sha256 );
|
SHA256_Final( (unsigned char*)hash, &ctx.sha256 );
|
||||||
|
|
||||||
memcpy( output, hash, 32 );
|
memcpy( output, hash, 32 );
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||||
@@ -87,8 +90,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
edata[19] = nonce;
|
edata[19] = nonce;
|
||||||
x21s_hash( hash32, edata );
|
if ( x21s_hash( hash32, edata, thr_id ) )
|
||||||
|
|
||||||
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||||
{
|
{
|
||||||
pdata[19] = bswap_32( nonce );
|
pdata[19] = bswap_32( nonce );
|
||||||
@@ -97,7 +99,7 @@ int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
|||||||
nonce++;
|
nonce++;
|
||||||
} while ( nonce < max_nonce && !(*restart) );
|
} while ( nonce < max_nonce && !(*restart) );
|
||||||
pdata[19] = nonce;
|
pdata[19] = nonce;
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -10,7 +10,7 @@ bool register_sonoa_algo( algo_gate_t* gate )
|
|||||||
gate->hash = (void*)&sonoa_4way_hash;
|
gate->hash = (void*)&sonoa_4way_hash;
|
||||||
#else
|
#else
|
||||||
init_sonoa_ctx();
|
init_sonoa_ctx();
|
||||||
gate->scanhash = (void*)&scanhash_sonoa;
|
// gate->scanhash = (void*)&scanhash_sonoa;
|
||||||
gate->hash = (void*)&sonoa_hash;
|
gate->hash = (void*)&sonoa_hash;
|
||||||
#endif
|
#endif
|
||||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||||
|
@@ -14,19 +14,19 @@ bool register_sonoa_algo( algo_gate_t* gate );
|
|||||||
|
|
||||||
#if defined(SONOA_8WAY)
|
#if defined(SONOA_8WAY)
|
||||||
|
|
||||||
void sonoa_8way_hash( void *state, const void *input );
|
int sonoa_8way_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#elif defined(SONOA_4WAY)
|
#elif defined(SONOA_4WAY)
|
||||||
|
|
||||||
void sonoa_4way_hash( void *state, const void *input );
|
int sonoa_4way_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void sonoa_hash( void *state, const void *input );
|
int sonoa_hash( void *state, const void *input, int thrid );
|
||||||
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
|
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
void init_sonoa_ctx();
|
void init_sonoa_ctx();
|
||||||
|
@@ -83,7 +83,7 @@ void init_sonoa_ctx()
|
|||||||
sph_haval256_5_init(&sonoa_ctx.haval);
|
sph_haval256_5_init(&sonoa_ctx.haval);
|
||||||
};
|
};
|
||||||
|
|
||||||
void sonoa_hash( void *state, const void *input )
|
int sonoa_hash( void *state, const void *input, int thrid )
|
||||||
{
|
{
|
||||||
uint8_t hash[128] __attribute__ ((aligned (64)));
|
uint8_t hash[128] __attribute__ ((aligned (64)));
|
||||||
sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
|
sonoa_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||||
@@ -132,6 +132,7 @@ void sonoa_hash( void *state, const void *input )
|
|||||||
sph_echo512_close(&ctx.echo, hash);
|
sph_echo512_close(&ctx.echo, hash);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
//
|
//
|
||||||
|
|
||||||
sph_bmw512_init( &ctx.bmw);
|
sph_bmw512_init( &ctx.bmw);
|
||||||
@@ -189,6 +190,7 @@ void sonoa_hash( void *state, const void *input )
|
|||||||
sph_hamsi512(&ctx.hamsi, hash, 64);
|
sph_hamsi512(&ctx.hamsi, hash, 64);
|
||||||
sph_hamsi512_close(&ctx.hamsi, hash);
|
sph_hamsi512_close(&ctx.hamsi, hash);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
//
|
//
|
||||||
|
|
||||||
sph_bmw512_init( &ctx.bmw);
|
sph_bmw512_init( &ctx.bmw);
|
||||||
@@ -250,6 +252,7 @@ void sonoa_hash( void *state, const void *input )
|
|||||||
sph_fugue512(&ctx.fugue, hash, 64);
|
sph_fugue512(&ctx.fugue, hash, 64);
|
||||||
sph_fugue512_close(&ctx.fugue, hash);
|
sph_fugue512_close(&ctx.fugue, hash);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
//
|
//
|
||||||
|
|
||||||
sph_bmw512_init( &ctx.bmw);
|
sph_bmw512_init( &ctx.bmw);
|
||||||
@@ -333,6 +336,7 @@ void sonoa_hash( void *state, const void *input )
|
|||||||
sph_shavite512(&ctx.shavite, hash, 64);
|
sph_shavite512(&ctx.shavite, hash, 64);
|
||||||
sph_shavite512_close(&ctx.shavite, hash);
|
sph_shavite512_close(&ctx.shavite, hash);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
//
|
//
|
||||||
|
|
||||||
sph_bmw512_init( &ctx.bmw);
|
sph_bmw512_init( &ctx.bmw);
|
||||||
@@ -406,6 +410,7 @@ void sonoa_hash( void *state, const void *input )
|
|||||||
sph_whirlpool(&ctx.whirlpool, hash, 64);
|
sph_whirlpool(&ctx.whirlpool, hash, 64);
|
||||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
//
|
//
|
||||||
sph_bmw512_init( &ctx.bmw);
|
sph_bmw512_init( &ctx.bmw);
|
||||||
sph_bmw512(&ctx.bmw, hash, 64);
|
sph_bmw512(&ctx.bmw, hash, 64);
|
||||||
@@ -482,6 +487,7 @@ void sonoa_hash( void *state, const void *input )
|
|||||||
sph_whirlpool(&ctx.whirlpool, hash, 64);
|
sph_whirlpool(&ctx.whirlpool, hash, 64);
|
||||||
sph_whirlpool_close(&ctx.whirlpool, hash);
|
sph_whirlpool_close(&ctx.whirlpool, hash);
|
||||||
|
|
||||||
|
if ( work_restart[thrid].restart ) return 0;
|
||||||
//
|
//
|
||||||
|
|
||||||
sph_bmw512_init( &ctx.bmw);
|
sph_bmw512_init( &ctx.bmw);
|
||||||
@@ -560,64 +566,7 @@ void sonoa_hash( void *state, const void *input )
|
|||||||
sph_haval256_5_close(&ctx.haval, hash);
|
sph_haval256_5_close(&ctx.haval, hash);
|
||||||
|
|
||||||
memcpy(state, hash, 32);
|
memcpy(state, hash, 32);
|
||||||
}
|
return 1;
|
||||||
|
|
||||||
int scanhash_sonoa( struct work *work, uint32_t max_nonce,
|
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
|
||||||
{
|
|
||||||
uint32_t _ALIGN(128) hash32[8];
|
|
||||||
uint32_t _ALIGN(128) endiandata[20];
|
|
||||||
uint32_t *pdata = work->data;
|
|
||||||
uint32_t *ptarget = work->target;
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
uint32_t n = pdata[19] - 1;
|
|
||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
|
||||||
|
|
||||||
uint64_t htmax[] =
|
|
||||||
{
|
|
||||||
0,
|
|
||||||
0xF,
|
|
||||||
0xFF,
|
|
||||||
0xFFF,
|
|
||||||
0xFFFF,
|
|
||||||
0x10000000
|
|
||||||
};
|
|
||||||
uint32_t masks[] =
|
|
||||||
{
|
|
||||||
0xFFFFFFFF,
|
|
||||||
0xFFFFFFF0,
|
|
||||||
0xFFFFFF00,
|
|
||||||
0xFFFFF000,
|
|
||||||
0xFFFF0000,
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// we need bigendian data...
|
|
||||||
casti_m128i( endiandata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
|
||||||
casti_m128i( endiandata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
|
||||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
|
||||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
|
||||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
|
||||||
|
|
||||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
|
||||||
{
|
|
||||||
uint32_t mask = masks[m];
|
|
||||||
do
|
|
||||||
{
|
|
||||||
pdata[19] = ++n;
|
|
||||||
be32enc(&endiandata[19], n);
|
|
||||||
sonoa_hash(hash32, endiandata);
|
|
||||||
if ( !( hash32[7] & mask ) )
|
|
||||||
if ( fulltest( hash32, ptarget ) && !opt_benchmark )
|
|
||||||
submit_solution( work, hash32, mythr );
|
|
||||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
|
||||||
pdata[19] = n;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user