mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
23 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a51f59086b | ||
![]() |
6f49ba09b7 | ||
![]() |
e2d5762ef2 | ||
![]() |
e625ed5420 | ||
![]() |
9abc19a30a | ||
![]() |
0d769ee0fe | ||
![]() |
0d48d573ce | ||
![]() |
d6e8d7a46e | ||
![]() |
71d6b97ee8 | ||
![]() |
b2331375a3 | ||
![]() |
7fec680835 | ||
![]() |
1b0a5aadf6 | ||
![]() |
0a3c52810e | ||
![]() |
4d4386a374 | ||
![]() |
ce259b915a | ||
![]() |
02202ab803 | ||
![]() |
77c5ae80ab | ||
![]() |
eb3f57bfc7 | ||
![]() |
e1aead3c76 | ||
![]() |
bfd1c002f9 | ||
![]() |
9edc650042 | ||
![]() |
218cef337a | ||
![]() |
9ffce7bdb7 |
123
INSTALL_LINUX
Normal file
123
INSTALL_LINUX
Normal file
@@ -0,0 +1,123 @@
|
||||
|
||||
|
||||
Requirements:
|
||||
|
||||
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
||||
supported.
|
||||
64 bit Linux operating system. Apple is not supported.
|
||||
|
||||
Building on linux prerequisites:
|
||||
|
||||
It is assumed users know how to install packages on their system and
|
||||
be able to compile standard source packages. This is basic Linux and
|
||||
beyond the scope of cpuminer-opt. Regardless compiling is trivial if you
|
||||
follow the instructions.
|
||||
|
||||
Make sure you have the basic development packages installed.
|
||||
Here is a good start:
|
||||
|
||||
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
|
||||
|
||||
Install any additional dependencies needed by cpuminer-opt. The list below
|
||||
are some of the ones that may not be in the default install and need to
|
||||
be installed manually. There may be others, read the error messages they
|
||||
will give a clue as to the missing package.
|
||||
|
||||
The following command should install everything you need on Debian based
|
||||
distributions such as Ubuntu:
|
||||
|
||||
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake zlib1g-dev
|
||||
|
||||
build-essential (Development Tools package group on Fedora)
|
||||
automake
|
||||
libjansson-dev
|
||||
libgmp-dev
|
||||
libcurl4-openssl-dev
|
||||
libssl-dev
|
||||
lib-thread
|
||||
zlib1g-dev
|
||||
|
||||
SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and
|
||||
openssl 1.1.0e or higher. Add one of the following, depending on the
|
||||
compiler version, to CFLAGS:
|
||||
"-march=native" or "-march=znver1" or "-msha".
|
||||
|
||||
Additional instructions for static compilalation can be found here:
|
||||
https://lxadm.com/Static_compilation_of_cpuminer
|
||||
Static builds should only considered in a homogeneous HW and SW environment.
|
||||
Local builds will always have the best performance and compatibility.
|
||||
|
||||
Extract cpuminer source.
|
||||
|
||||
tar xvzf cpuminer-opt-x.y.z.tar.gz
|
||||
cd cpuminer-opt-x.y.z
|
||||
|
||||
Run ./build.sh to build on Linux or execute the following commands.
|
||||
|
||||
./autogen.sh
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
make
|
||||
|
||||
Start mining.
|
||||
|
||||
./cpuminer -a algo -o url -u username -p password
|
||||
|
||||
Windows
|
||||
|
||||
Precompiled Windows binaries are built on a Linux host using Mingw
|
||||
with a more recent compiler than the following Windows hosted procedure.
|
||||
|
||||
Building on Windows prerequisites:
|
||||
|
||||
msys
|
||||
mingw_w64
|
||||
Visual C++ redistributable 2008 X64
|
||||
openssl
|
||||
|
||||
Install msys and mingw_w64, only needed once.
|
||||
|
||||
Unpack msys into C:\msys or your preferred directory.
|
||||
|
||||
Install mingw_w64 from win-builds.
|
||||
Follow instructions, check "msys or cygwin" and "x86_64" and accept default
|
||||
existing msys instalation.
|
||||
|
||||
Open a msys shell by double clicking on msys.bat.
|
||||
Note that msys shell uses linux syntax for file specifications, "C:\" is
|
||||
mounted at "/c/".
|
||||
|
||||
Add mingw bin directory to PATH variable
|
||||
PATH="/c/msys/opt/windows_64/bin/:$PATH"
|
||||
|
||||
Instalation complete, compile cpuminer-opt.
|
||||
|
||||
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
|
||||
or similar Windows program.
|
||||
|
||||
In msys shell cd to miner directory.
|
||||
cd /c/path/to/cpuminer-opt
|
||||
|
||||
Run build.sh to build on Windows or execute the following commands.
|
||||
|
||||
./autogen.sh
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
make
|
||||
|
||||
Start mining
|
||||
|
||||
cpuminer.exe -a algo -o url -u user -p password
|
||||
|
||||
The following tips may be useful for older AMD CPUs.
|
||||
|
||||
AMD CPUs older than Steamroller, including Athlon x2 and Phenom II x4, are
|
||||
not supported by cpuminer-opt due to an incompatible implementation of SSE2
|
||||
on these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
|
||||
Some users with AMD CPUs without AES_NI have reported problems compiling
|
||||
with build.sh or "-march=native". Problems have included compile errors
|
||||
and poor performance. These users are recommended to compile manually
|
||||
specifying "-march=btver1" on the configure command line.
|
||||
|
||||
Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
|
173
INSTALL_WINDOWS
Normal file
173
INSTALL_WINDOWS
Normal file
@@ -0,0 +1,173 @@
|
||||
Instructions for compiling cpuminer-opt for Windows.
|
||||
|
||||
|
||||
Windows compilation using Visual Studio is not supported. Mingw64 is
|
||||
used on a Linux system (bare metal or virtual machine) to cross-compile
|
||||
cpuminer-opt executable binaries for Windows.
|
||||
|
||||
These instructions were written for Debian and Ubuntu compatible distributions
|
||||
but should work on other major distributions as well. However some of the
|
||||
package names or file paths may be different.
|
||||
|
||||
It is assumed a Linux system is already available and running. And the user
|
||||
has enough Linux knowledge to find and install packages and follow these
|
||||
instructions.
|
||||
|
||||
First it is a good idea to create new user specifically for cross compiling.
|
||||
It keeps all mingw stuff contained and isolated from the rest of the system.
|
||||
|
||||
Step by step...
|
||||
|
||||
1. Install necessary packages from the distribution's repositories.
|
||||
|
||||
Refer to Linux compile instructions and install required packages.
|
||||
|
||||
Additionally, install mingw-64.
|
||||
|
||||
sudo apt-get install mingw-w64
|
||||
|
||||
|
||||
2. Create a local library directory for packages to be compiled in the next
|
||||
step. Recommended location is $HOME/usr/lib/
|
||||
|
||||
|
||||
3. Download and build other packages for mingw that don't have a mingw64
|
||||
version available in the repositories.
|
||||
|
||||
Download the following source code packages from their respective and
|
||||
respected download locations, copy them to ~/usr/lib/ and uncompress them.
|
||||
|
||||
openssl
|
||||
curl
|
||||
gmp
|
||||
|
||||
In most cases the latest vesrion is ok but it's safest to download
|
||||
the same major and minor version as included in your distribution.
|
||||
|
||||
Run the following commands or follow the supplied instructions.
|
||||
Do not run "make install" unless you are using ~/usr/lib, which isn't
|
||||
recommended.
|
||||
|
||||
Some instructions insist on running "make check". If make check fails
|
||||
it may still work, YMMV.
|
||||
|
||||
You can speed up "make" by using all CPU cores available with "-j n" where
|
||||
n is the number of CPU threads you want to use.
|
||||
|
||||
openssl:
|
||||
|
||||
./Configure mingw64 shared --cross-compile-prefix=x86_64-w64-mingw32
|
||||
make
|
||||
|
||||
curl:
|
||||
|
||||
./configure --with-winssl --with-winidn --host=x86_64-w64-mingw32
|
||||
make
|
||||
|
||||
gmp:
|
||||
|
||||
./configure --host=x86_64-w64-mingw32
|
||||
make
|
||||
|
||||
|
||||
|
||||
4. Tweak the environment.
|
||||
|
||||
This step is required everytime you login or the commands can be added to
|
||||
.bashrc.
|
||||
|
||||
Define some local variables to point to local library.
|
||||
|
||||
export LOCAL_LIB="$HOME/usr/lib"
|
||||
|
||||
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
|
||||
|
||||
export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --with-crypto=$LOCAL_LIB/openssl --host=x86_64-w64-mingw32"
|
||||
|
||||
Create a release directory and copy some dll files previously built.
|
||||
This can be done outside of cpuminer-opt and only needs to be done once.
|
||||
If the release directory is in cpuminer-opt directory it needs to be
|
||||
recreated every a source package is decompressed.
|
||||
|
||||
mkdir release
|
||||
cp /usr/x86_64-w64-mingw32/lib/zlib1.dll release/
|
||||
cp /usr/x86_64-w64-mingw32/lib/libwinpthread-1.dll release/
|
||||
cp /usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libstdc++-6.dll release/
|
||||
cp /usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libgcc_s_seh-1.dll release/
|
||||
cp $LOCAL_LIB/openssl/libcrypto-1_1-x64.dll release/
|
||||
cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
|
||||
|
||||
|
||||
|
||||
The following steps need to be done every time a new source package is
|
||||
opened.
|
||||
|
||||
5. Download cpuminer-opt
|
||||
|
||||
Download the latest source code package of cpumuner-opt to your desired
|
||||
location. .zip or .tar.gz, your choice.
|
||||
|
||||
https://github.com/JayDDee/cpuminer-opt/releases
|
||||
|
||||
Decompress and change to the cpuminer-opt directory.
|
||||
|
||||
|
||||
|
||||
6. Prepare to compile
|
||||
|
||||
Create a link to the locally compiled version of gmp.h
|
||||
|
||||
ln -s $LOCAL_LIB/gmp-version/gmp.h ./gmp.h
|
||||
|
||||
Edit configure.ac to fix lipthread package name.
|
||||
|
||||
sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
||||
|
||||
|
||||
7. Compile
|
||||
|
||||
you can use the default compile if you intend to use cpuminer-opt on the
|
||||
same CPU and the virtual machine supports that architecture.
|
||||
|
||||
./build.sh
|
||||
|
||||
Otherwise you can compile manually while setting options in CFLAGS.
|
||||
|
||||
Some common options:
|
||||
|
||||
To compile for a specific CPU architecture:
|
||||
|
||||
CFLAGS="-O3 -march=znver1 -Wall" ./configure --with-curl
|
||||
|
||||
This will compile for AMD Ryzen.
|
||||
|
||||
You can compile more generically for a set of specific CPU features
|
||||
if you know what features you want:
|
||||
|
||||
CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure --with-curl
|
||||
|
||||
This will compile for an older CPU that does not have AVX.
|
||||
|
||||
You can find several examples in build-allarch.sh
|
||||
|
||||
If you have a CPU with more than 64 threads and Windows 7 or higher you
|
||||
can enable the CPU Groups feature:
|
||||
|
||||
-D_WIN32_WINNT==0x0601
|
||||
|
||||
Once you have run configure successfully run make with n CPU threads:
|
||||
|
||||
make -j n
|
||||
|
||||
Copy cpuminer.exe to the release directory, compress and copy the release
|
||||
directory to a Windows system and run cpuminer.exe from the command line.
|
||||
|
||||
Run cpuminer
|
||||
|
||||
In a command windows change directories to the unzipped release folder.
|
||||
to get a list of all options:
|
||||
|
||||
cpuminer.exe --help
|
||||
|
||||
Command options are specific to where you mine. Refer to the pool's
|
||||
instructions on how to set them.
|
61
Makefile.am
61
Makefile.am
@@ -42,10 +42,11 @@ cpuminer_SOURCES = \
|
||||
algo/argon2/argon2d/argon2d/argon2.c \
|
||||
algo/argon2/argon2d/argon2d/core.c \
|
||||
algo/argon2/argon2d/argon2d/opt.c \
|
||||
algo/argon2/argon2d/argon2d/thread.c \
|
||||
algo/argon2/argon2d/argon2d/argon2d_thread.c \
|
||||
algo/argon2/argon2d/argon2d/encoding.c \
|
||||
algo/blake/sph_blake.c \
|
||||
algo/blake/blake-hash-4way.c \
|
||||
algo/blake/blake256-hash-4way.c \
|
||||
algo/blake/blake512-hash-4way.c \
|
||||
algo/blake/blake-gate.c \
|
||||
algo/blake/blake.c \
|
||||
algo/blake/blake-4way.c \
|
||||
@@ -67,14 +68,18 @@ cpuminer_SOURCES = \
|
||||
algo/blake/pentablake-4way.c \
|
||||
algo/blake/pentablake.c \
|
||||
algo/bmw/sph_bmw.c \
|
||||
algo/bmw/bmw-hash-4way.c \
|
||||
algo/bmw/bmw256-hash-4way.c \
|
||||
algo/bmw/bmw512-hash-4way.c \
|
||||
algo/bmw/bmw256.c \
|
||||
algo/bmw/bmw512-gate.c \
|
||||
algo/bmw/bmw512.c \
|
||||
algo/bmw/bmw512-4way.c \
|
||||
algo/cryptonight/cryptolight.c \
|
||||
algo/cryptonight/cryptonight-common.c\
|
||||
algo/cryptonight/cryptonight-aesni.c\
|
||||
algo/cryptonight/cryptonight.c\
|
||||
algo/cubehash/sph_cubehash.c \
|
||||
algo/cubehash/sse2/cubehash_sse2.c\
|
||||
algo/cubehash/cubehash_sse2.c\
|
||||
algo/cubehash/cube-hash-2way.c \
|
||||
algo/echo/sph_echo.c \
|
||||
algo/echo/aes_ni/hash.c\
|
||||
@@ -116,33 +121,37 @@ cpuminer_SOURCES = \
|
||||
algo/luffa/luffa-hash-2way.c \
|
||||
algo/lyra2/lyra2.c \
|
||||
algo/lyra2/sponge.c \
|
||||
algo/lyra2/lyra2rev2-gate.c \
|
||||
algo/lyra2/lyra2-gate.c \
|
||||
algo/lyra2/lyra2rev2.c \
|
||||
algo/lyra2/lyra2rev2-4way.c \
|
||||
algo/lyra2/lyra2rev3.c \
|
||||
algo/lyra2/lyra2rev3-4way.c \
|
||||
algo/lyra2/lyra2re.c \
|
||||
algo/lyra2/lyra2z-gate.c \
|
||||
algo/lyra2/lyra2z.c \
|
||||
algo/lyra2/lyra2z-4way.c \
|
||||
algo/lyra2/lyra2z330.c \
|
||||
algo/lyra2/lyra2h-gate.c \
|
||||
algo/lyra2/lyra2h.c \
|
||||
algo/lyra2/lyra2h-4way.c \
|
||||
algo/lyra2/allium-gate.c \
|
||||
algo/lyra2/allium-4way.c \
|
||||
algo/lyra2/allium.c \
|
||||
algo/lyra2/phi2-4way.c \
|
||||
algo/lyra2/phi2.c \
|
||||
algo/m7m.c \
|
||||
algo/neoscrypt/neoscrypt.c \
|
||||
algo/nist5/nist5-gate.c \
|
||||
algo/nist5/nist5-4way.c \
|
||||
algo/nist5/nist5.c \
|
||||
algo/nist5/zr5.c \
|
||||
algo/pluck.c \
|
||||
algo/panama/sph_panama.c \
|
||||
algo/radiogatun/sph_radiogatun.c \
|
||||
algo/quark/quark-gate.c \
|
||||
algo/quark/quark.c \
|
||||
algo/quark/quark-4way.c \
|
||||
algo/quark/anime-gate.c \
|
||||
algo/quark/anime.c \
|
||||
algo/quark/anime-4way.c \
|
||||
algo/quark/hmq1725-gate.c \
|
||||
algo/quark/hmq1725-4way.c \
|
||||
algo/quark/hmq1725.c \
|
||||
algo/qubit/qubit-gate.c \
|
||||
algo/qubit/qubit.c \
|
||||
algo/qubit/qubit-2way.c \
|
||||
@@ -154,19 +163,25 @@ cpuminer_SOURCES = \
|
||||
algo/ripemd/lbry-gate.c \
|
||||
algo/ripemd/lbry.c \
|
||||
algo/ripemd/lbry-4way.c \
|
||||
algo/scrypt.c \
|
||||
algo/scrypt/scrypt.c \
|
||||
algo/scrypt/neoscrypt.c \
|
||||
algo/scrypt/pluck.c \
|
||||
algo/scryptjane/scrypt-jane.c \
|
||||
algo/sha/sph_sha2.c \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/sha/sha2-hash-4way.c \
|
||||
algo/sha/sha256_hash_11way.c \
|
||||
algo/sha/sha2.c \
|
||||
algo/sha/sha256t-gate.c \
|
||||
algo/sha/sha256t-4way.c \
|
||||
algo/sha/sha256t.c \
|
||||
algo/sha/sha256q-4way.c \
|
||||
algo/sha/sha256q.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/shabal/shabal-hash-4way.c \
|
||||
algo/shavite/sph_shavite.c \
|
||||
algo/shavite/sph-shavite-aesni.c \
|
||||
algo/shavite/shavite-hash-2way.c \
|
||||
algo/shavite/shavite.c \
|
||||
algo/simd/sph_simd.c \
|
||||
algo/simd/nist.c \
|
||||
@@ -186,7 +201,6 @@ cpuminer_SOURCES = \
|
||||
algo/whirlpool/sph_whirlpool.c \
|
||||
algo/whirlpool/whirlpool-hash-4way.c \
|
||||
algo/whirlpool/whirlpool-gate.c \
|
||||
algo/whirlpool/whirlpool-4way.c \
|
||||
algo/whirlpool/whirlpool.c \
|
||||
algo/whirlpool/whirlpoolx.c \
|
||||
algo/x11/x11-gate.c \
|
||||
@@ -227,6 +241,8 @@ cpuminer_SOURCES = \
|
||||
algo/x13/skunk-4way.c \
|
||||
algo/x13/skunk.c \
|
||||
algo/x13/drop.c \
|
||||
algo/x13/x13bcd-4way.c \
|
||||
algo/x13/x13bcd.c \
|
||||
algo/x14/x14-gate.c \
|
||||
algo/x14/x14.c \
|
||||
algo/x14/x14-4way.c \
|
||||
@@ -240,19 +256,30 @@ cpuminer_SOURCES = \
|
||||
algo/x15/x15-gate.c \
|
||||
algo/x15/x15.c \
|
||||
algo/x15/x15-4way.c \
|
||||
algo/x16/x16r-gate.c \
|
||||
algo/x16/x16r.c \
|
||||
algo/x16/x16r-4way.c \
|
||||
algo/x16/x16rt.c \
|
||||
algo/x16/x16rt-4way.c \
|
||||
algo/x16/hex.c \
|
||||
algo/x16/x21s-4way.c \
|
||||
algo/x16/x21s.c \
|
||||
algo/x17/x17-gate.c \
|
||||
algo/x17/x17.c \
|
||||
algo/x17/x17-4way.c \
|
||||
algo/x17/xevan-gate.c \
|
||||
algo/x17/xevan.c \
|
||||
algo/x17/xevan-4way.c \
|
||||
algo/x17/x16r-gate.c \
|
||||
algo/x17/x16r.c \
|
||||
algo/x17/x16r-4way.c \
|
||||
algo/x17/hmq1725.c \
|
||||
algo/x17/sonoa-gate.c \
|
||||
algo/x17/sonoa-4way.c \
|
||||
algo/x17/sonoa.c \
|
||||
algo/x20/x20r.c \
|
||||
algo/yescrypt/yescrypt.c \
|
||||
algo/yescrypt/sha256_Y.c \
|
||||
algo/yescrypt/yescrypt-best.c
|
||||
algo/yescrypt/yescrypt-best.c \
|
||||
algo/yespower/yespower.c \
|
||||
algo/yespower/sha256_p.c \
|
||||
algo/yespower/yespower-opt.c
|
||||
|
||||
disable_flags =
|
||||
|
||||
|
156
README.md
156
README.md
@@ -16,7 +16,8 @@ https://bitcointalk.org/index.php?topic=1326803.0
|
||||
|
||||
mailto://jayddee246@gmail.com
|
||||
|
||||
See file RELEASE_NOTES for change log and compile instructions.
|
||||
See file RELEASE_NOTES for change log and INSTALL_LINUX or INSTALL_WINDOWS
|
||||
for compile instructions.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
@@ -45,86 +46,99 @@ MacOS, OSx and Android are not supported.
|
||||
Supported Algorithms
|
||||
--------------------
|
||||
|
||||
allium Garlicoin
|
||||
anime Animecoin
|
||||
argon2 Argon2 coin (AR2)
|
||||
argon2d-crds Credits (CRDS)
|
||||
argon2d-dyn Dynamic (DYN)
|
||||
axiom Shabal-256 MemoHash
|
||||
allium Garlicoin
|
||||
anime Animecoin
|
||||
argon2 Argon2 coin (AR2)
|
||||
argon2d250 argon2d-crds, Credits (CRDS)
|
||||
argon2d500 argon2d-dyn, Dynamic (DYN)
|
||||
argon2d4096 argon2d-uis, Unitus, (UIS)
|
||||
axiom Shabal-256 MemoHash
|
||||
bastion
|
||||
blake Blake-256 (SFR)
|
||||
blakecoin blake256r8
|
||||
blake2s Blake-2 S
|
||||
bmw BMW 256
|
||||
c11 Chaincoin
|
||||
cryptolight Cryptonight-light
|
||||
cryptonight cryptonote, Monero (XMR)
|
||||
blake Blake-256 (SFR)
|
||||
blakecoin blake256r8
|
||||
blake2s Blake-2 S
|
||||
bmw BMW 256
|
||||
bmw512 BMW 512
|
||||
c11 Chaincoin
|
||||
decred
|
||||
deep Deepcoin (DCN)
|
||||
dmd-gr Diamond-Groestl
|
||||
drop Dropcoin
|
||||
fresh Fresh
|
||||
groestl Groestl coin
|
||||
heavy Heavy
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
jha Jackpotcoin
|
||||
keccak Maxcoin
|
||||
keccakc Creative coin
|
||||
lbry LBC, LBRY Credits
|
||||
luffa Luffa
|
||||
lyra2h Hppcoin
|
||||
lyra2re lyra2
|
||||
lyra2rev2 lyra2v2, Vertcoin
|
||||
lyra2z Zcoin (XZC)
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||
m7m Magi (XMG)
|
||||
myr-gr Myriad-Groestl
|
||||
neoscrypt NeoScrypt(128, 2, 1)
|
||||
nist5 Nist5
|
||||
pentablake Pentablake
|
||||
phi1612 phi, LUX coin
|
||||
pluck Pluck:128 (Supcoin)
|
||||
polytimos Ninja
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
deep Deepcoin (DCN)
|
||||
dmd-gr Diamond-Groestl
|
||||
drop Dropcoin
|
||||
fresh Fresh
|
||||
groestl Groestl coin
|
||||
heavy Heavy
|
||||
hex x16r-hex
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
jha Jackpotcoin
|
||||
keccak Maxcoin
|
||||
keccakc Creative coin
|
||||
lbry LBC, LBRY Credits
|
||||
luffa Luffa
|
||||
lyra2h Hppcoin
|
||||
lyra2re lyra2
|
||||
lyra2rev2 lyra2v2
|
||||
lyra2rev3 lyrav2v3, Vertcoin
|
||||
lyra2z
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||
m7m Magi (XMG)
|
||||
myr-gr Myriad-Groestl
|
||||
neoscrypt NeoScrypt(128, 2, 1)
|
||||
nist5 Nist5
|
||||
pentablake Pentablake
|
||||
phi1612 phi, LUX coin (original algo)
|
||||
phi2 LUX coin (new algo)
|
||||
pluck Pluck:128 (Supcoin)
|
||||
polytimos Ninja
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
scryptjane:nf
|
||||
sha256d Double SHA-256
|
||||
sha256t Triple SHA-256, Onecoin (OC)
|
||||
shavite3 Shavite3
|
||||
skein Skein+Sha (Skeincoin)
|
||||
skein2 Double Skein (Woodcoin)
|
||||
skunk Signatum (SIGT)
|
||||
timetravel Machinecoin (MAC)
|
||||
timetravel10 Bitcore
|
||||
tribus Denarius (DNR)
|
||||
vanilla blake256r8vnl (VCash)
|
||||
veltor (VLT)
|
||||
sha256d Double SHA-256
|
||||
sha256q Quad SHA-256, Pyrite (PYE)
|
||||
sha256t Triple SHA-256, Onecoin (OC)
|
||||
shavite3 Shavite3
|
||||
skein Skein+Sha (Skeincoin)
|
||||
skein2 Double Skein (Woodcoin)
|
||||
skunk Signatum (SIGT)
|
||||
sonoa Sono
|
||||
timetravel Machinecoin (MAC)
|
||||
timetravel10 Bitcore
|
||||
tribus Denarius (DNR)
|
||||
vanilla blake256r8vnl (VCash)
|
||||
veltor (VLT)
|
||||
whirlpool
|
||||
whirlpoolx
|
||||
x11 Dash
|
||||
x11evo Revolvercoin
|
||||
x11gost sib (SibCoin)
|
||||
x12 Galaxie Cash (GCH)
|
||||
x13 X13
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r Ravencoin (RVN)
|
||||
x16s pigeoncoin (PGN)
|
||||
x11 Dash
|
||||
x11evo Revolvercoin
|
||||
x11gost sib (SibCoin)
|
||||
x12 Galaxie Cash (GCH)
|
||||
x13 X13
|
||||
x13bcd bcd
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r Ravencoin (RVN)
|
||||
x16rt Gincoin (GIN)
|
||||
x16rt_veil Veil (VEIL)
|
||||
x16s Pigeoncoin (PGN)
|
||||
x17
|
||||
xevan Bitsend (BSD)
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr16 Yenten (YTN)
|
||||
yescryptr32 WAVI
|
||||
zr5 Ziftr
|
||||
x21s
|
||||
xevan Bitsend (BSD)
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr16 Eli
|
||||
yescryptr32 WAVI
|
||||
yespower Cryply
|
||||
yespowerr16 Yenten (YTN)
|
||||
zr5 Ziftr
|
||||
|
||||
Errata
|
||||
------
|
||||
|
||||
Cryptonight and variants are no longer supported, use another miner.
|
||||
|
||||
Neoscrypt crashes on Windows, use legacy version.
|
||||
|
||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||
|
21
README.txt
21
README.txt
@@ -12,30 +12,27 @@ the software, don't use it.
|
||||
Choose the exe that best matches you CPU's features or use trial and
|
||||
error to find the fastest one that doesn't crash. Pay attention to
|
||||
the features listed at cpuminer startup to ensure you are mining at
|
||||
optimum speed using all the available features.
|
||||
optimum speed using the best available features.
|
||||
|
||||
Architecture names and compile options used are only provided for Intel
|
||||
Core series. Pentium and Celeron often have fewer features.
|
||||
Core series. Even the newest Pentium and Celeron CPUs are often missing
|
||||
features.
|
||||
|
||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
|
||||
Exe name Compile flags Arch name
|
||||
Exe name Compile flags Arch name
|
||||
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-maes -msse4.2" Westmere
|
||||
cpuminer-aes-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2" Haswell...
|
||||
cpuminer-avx2-sha.exe "-march=core-avx2 -msha" Ryzen
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere
|
||||
cpuminer-avx.exe "-march=corei7-avx" Sandy-Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2" Haswell, Sky-Kaby-Coffeelake
|
||||
cpuminer-zen "-march=znver1" AMD Ryzen, Threadripper
|
||||
|
||||
If you like this software feel free to donate:
|
||||
|
||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
||||
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
|
||||
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
|
||||
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
|
||||
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
|
||||
|
||||
|
||||
|
332
RELEASE_NOTES
332
RELEASE_NOTES
@@ -1,11 +1,11 @@
|
||||
puminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
|
||||
cpuminer-opt is a console program run from the command line using the
|
||||
keyboard, not the mouse.
|
||||
|
||||
cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
|
||||
This feature requires recent SW including GCC version 5 or higher and
|
||||
openssl version 1.1 or higher. It may also require using "-march=znver1"
|
||||
compile flag.
|
||||
|
||||
HW SHA support is only available when compiled from source, Windows binaries
|
||||
are not yet available.
|
||||
|
||||
cpuminer-opt is a console program, if you're using a mouse you're doing it
|
||||
wrong.
|
||||
|
||||
@@ -25,141 +25,210 @@ required.
|
||||
Compile Instructions
|
||||
--------------------
|
||||
|
||||
Requirements:
|
||||
See INSTALL_LINUX or INSTALL_WINDOWS fror compile instruuctions
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
||||
supported.
|
||||
64 bit Linux or Windows operating system. Apple is not supported.
|
||||
|
||||
Building on linux prerequisites:
|
||||
|
||||
It is assumed users know how to install packages on their system and
|
||||
be able to compile standard source packages. This is basic Linux and
|
||||
beyond the scope of cpuminer-opt.
|
||||
|
||||
Make sure you have the basic development packages installed.
|
||||
Here is a good start:
|
||||
|
||||
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
|
||||
|
||||
Install any additional dependencies needed by cpuminer-opt. The list below
|
||||
are some of the ones that may not be in the default install and need to
|
||||
be installed manually. There may be others, read the error messages they
|
||||
will give a clue as to the missing package.
|
||||
|
||||
The following command should install everything you need on Debian based
|
||||
distributions such as Ubuntu:
|
||||
|
||||
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake
|
||||
|
||||
|
||||
build-essential (for Ubuntu, Development Tools package group on Fedora)
|
||||
automake
|
||||
libjansson-dev
|
||||
libgmp-dev
|
||||
libcurl4-openssl-dev
|
||||
libssl-dev
|
||||
pthreads
|
||||
zlib
|
||||
|
||||
SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and openssl 1.1
|
||||
or higher. Reports of improved performiance on Ryzen when using openssl 1.0.2
|
||||
have been due to AVX and AVX2 optimizations added to that version.
|
||||
Additional improvements are expected on Ryzen with openssl 1.1.
|
||||
"-march-znver1" or "-msha".
|
||||
|
||||
Additional instructions for static compilalation can be found here:
|
||||
https://lxadm.com/Static_compilation_of_cpuminer
|
||||
Static builds should only considered in a homogeneous HW and SW environment.
|
||||
Local builds will always have the best performance and compatibility.
|
||||
|
||||
Extract cpuminer source.
|
||||
|
||||
tar xvzf cpuminer-opt-x.y.z.tar.gz
|
||||
cd cpuminer-opt-x.y.z
|
||||
|
||||
Run ./build.sh to build on Linux or execute the following commands.
|
||||
|
||||
./autogen.sh
|
||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
make
|
||||
|
||||
Additional optional compile flags, add the following to CFLAGS to activate:
|
||||
|
||||
-DUSE_SPH_SHA
|
||||
|
||||
SPH may give slightly better performance on algos that use sha256 when using
|
||||
openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
|
||||
better than SPH. This option is ignored when 4-way is used, even for CPUs
|
||||
with SHA.
|
||||
|
||||
Start mining.
|
||||
|
||||
./cpuminer -a algo -o url -u username -p password
|
||||
|
||||
Windows
|
||||
|
||||
Precompiled Windows binaries are built on a Linux host using Mingw
|
||||
with a more recent compiler than the following Windows hosted procedure.
|
||||
|
||||
Building on Windows prerequisites:
|
||||
|
||||
msys
|
||||
mingw_w64
|
||||
Visual C++ redistributable 2008 X64
|
||||
openssl
|
||||
|
||||
Install msys and mingw_w64, only needed once.
|
||||
|
||||
Unpack msys into C:\msys or your preferred directory.
|
||||
|
||||
Install mingw_w64 from win-builds.
|
||||
Follow instructions, check "msys or cygwin" and "x86_64" and accept default
|
||||
existing msys instalation.
|
||||
|
||||
Open a msys shell by double clicking on msys.bat.
|
||||
Note that msys shell uses linux syntax for file specifications, "C:\" is
|
||||
mounted at "/c/".
|
||||
|
||||
Add mingw bin directory to PATH variable
|
||||
PATH="/c/msys/opt/windows_64/bin/:$PATH"
|
||||
|
||||
Instalation complete, compile cpuminer-opt.
|
||||
|
||||
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
|
||||
or similar Windows program.
|
||||
|
||||
In msys shell cd to miner directory.
|
||||
cd /c/path/to/cpuminer-opt
|
||||
|
||||
Run build.sh to build on Windows or execute the following commands.
|
||||
|
||||
./autogen.sh
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
make
|
||||
|
||||
Start mining
|
||||
|
||||
cpuminer.exe -a algo -o url -u user -p password
|
||||
|
||||
The following tips may be useful for older AMD CPUs.
|
||||
|
||||
AMD CPUs older than Steamroller, including Athlon x2 and Phenom II x4, are
|
||||
not supported by cpuminer-opt due to an incompatible implementation of SSE2
|
||||
on these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
|
||||
Some users with AMD CPUs without AES_NI have reported problems compiling
|
||||
with build.sh or "-march=native". Problems have included compile errors
|
||||
and poor performance. These users are recommended to compile manually
|
||||
specifying "-march=btver1" on the configure command line.
|
||||
|
||||
Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
|
||||
64 bit Linux or Windows operating system. Apple and Android are not supported.
|
||||
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.9.6.1
|
||||
|
||||
New algos: x21s, hex (alias x16r-hex).
|
||||
|
||||
v3.9.6
|
||||
|
||||
New algos: bmw512, x16rt, x16rt-veil (alias veil), x13bcd (alias bcd).
|
||||
|
||||
v3.9.5.4
|
||||
|
||||
Fixed sha256q AVX2 poor performance.
|
||||
Fixed skein2 buffer overflow and restored bswap-interleave optimization.
|
||||
More restructuring.
|
||||
|
||||
v3.9.5.3
|
||||
|
||||
Fix crash mining hodl with aes-sse42.
|
||||
More restructuring and share report tweaks.
|
||||
|
||||
v3.9.5.2
|
||||
|
||||
Revert bswap-interleave optimization for causing crashes on Windows.
|
||||
|
||||
v3.9.5.1
|
||||
|
||||
Fixed skein2 crash on Windows.
|
||||
|
||||
Fixed CPU temperature reading on Ubuntu 19.04.
|
||||
|
||||
Realigned log message colours, blue is used to report normal activity and
|
||||
yellow is only used to report abnormal activity.
|
||||
|
||||
Changed stats colours, yellow now means below average, white is average
|
||||
range. Tweaked colour thresholds.
|
||||
|
||||
Changed colour of stratum difficulty change messages to blue to match other
|
||||
normal protocol messages. Blue messages (block, stratum, submit) will no
|
||||
longer be displayed when using -q option.
|
||||
|
||||
Added job id to new block, share submit, and share result messages and added
|
||||
new nessage when a new job is received for an existing block. This will for
|
||||
better troubleshooting of invalid job id rejects seen at zergpool.
|
||||
|
||||
Some more restructuring.
|
||||
|
||||
v3.9.5
|
||||
|
||||
New share reporting information includes calculation of equivalent hashrate
|
||||
based on share difficulty, network latency, 5 minute summary.
|
||||
Per-thread hash rate reports are disabled by default.
|
||||
New command line option --hash-meter added to enable per-thread hash rates.
|
||||
|
||||
|
||||
v3.9.4
|
||||
|
||||
Faster AVX2 for lyra2v3, quark, anime.
|
||||
Fixed skein AVX2 regression (invalid shares since v3.9.0) and faster.
|
||||
Faster skein2 with 4way AVX2 enabled.
|
||||
Automatic SHA override on Ryzen CPUs, no need for -DRYZEN compile flag.
|
||||
Ongoing restructuring.
|
||||
|
||||
v3.9.3.1
|
||||
|
||||
Skipped v3.9.3 due to misidentification of v3.9.2.5 as v3.9.3.
|
||||
Fixed x16r algo 25% invalid share reject rate. The bug may have also
|
||||
affected other algos.
|
||||
|
||||
v3.9.2.5
|
||||
|
||||
Fixed 2 regressions: hodl AES detection, x16r invalid shares with AVX2.
|
||||
More restructuring.
|
||||
|
||||
v3.9.2.4
|
||||
|
||||
Yet another affinity fix. Hopefully the last one.
|
||||
|
||||
v3.9.2.3
|
||||
|
||||
Another cpu-affinity fix.
|
||||
Disabled test code that fails to compile on some CPUs with limited
|
||||
AVX512 capabilities.
|
||||
|
||||
v3.9.2.2
|
||||
|
||||
Fixed some day one cpu-affinity issues.
|
||||
|
||||
v3.9.2
|
||||
|
||||
Added sha256q algo.
|
||||
Yespower now uses openssl SHA256, but no observable hash rate increase
|
||||
on Ryzen.
|
||||
Ongoing rearchitecting.
|
||||
Lyra2z now hashes 8-way on CPUs with AVX2.
|
||||
Lyra2 (all including phi2) now runs optimized code with SSE2.
|
||||
|
||||
v3.9.1.1
|
||||
|
||||
Fixed lyra2v3 AVX and below.
|
||||
|
||||
Compiling on Windows using Cygwin now works. Simply use "./build.sh"
|
||||
just like on Linux. It isn't portable therefore the binaries package will
|
||||
continue to use the existing procedure.
|
||||
The Cygwin procedure will be documented in more detail later and will
|
||||
include a list of packages that need to be installed.
|
||||
|
||||
v3.9.1
|
||||
|
||||
Fixed AVX2 version of anime algo.
|
||||
|
||||
Added sonoa algo.
|
||||
|
||||
Added "-DRYZEN_" compile option for Ryzen to override 4-way hashing when algo
|
||||
contains sha256 and use SHA instead. This is due to a combination of
|
||||
the introduction of HW SHA support combined with the poor performance
|
||||
of AVX2 on Ryzen. The Windows binaries package replaces cpuminer-avx2-sha
|
||||
with cpuminer-zen compiled with the override. Refer to the build instructions
|
||||
for more information.
|
||||
|
||||
Ongoing restructuring to streamline the process, reduce latency,
|
||||
reduce memory usage and unnecessary copying of data. Most of these
|
||||
will not result in a notoceably higher reported hashrate as the
|
||||
change simply reduces the time wasted that wasn't factored into the
|
||||
hash rate reported by the miner. In short, less dead time resulting in
|
||||
a higher net hashrate.
|
||||
|
||||
One of these measures to reduce latency also results in an enhanced
|
||||
share submission message including the share number*, the CPU thread,
|
||||
and the vector lane that found the solution. The time difference between
|
||||
the share submission and acceptance (or rejection) response indicates
|
||||
network ltatency. One other effect of this change is a reduction in hash
|
||||
meter messages because the scan function no longer exits when a share is
|
||||
found. Scan cycles will go longer and submit multiple shares per cycle.
|
||||
*the share number is antcipated and includes both accepted and rejected
|
||||
shares. Because the share is antipated and not synchronized it may be
|
||||
incorrect in time of very rapid share submission. Under most conditions
|
||||
it should be easy to match the submission with the corresponding response.
|
||||
|
||||
Removed "-DUSE_SPH_SHA" option, all users should have a recent version of
|
||||
openssl installed: v1.0.2 (Ubuntu 16.04) or better. Ryzen SHA requires
|
||||
v1.1.0 or better. Ryzen SHA is not used when hashing multi-way parallel.
|
||||
Ryzen SHA is available in the Windows binaries release package.
|
||||
|
||||
Improved compile instructions, now in seperate files: INSTALL_LINUX and
|
||||
INSTALL_WINDOWS. The Windows instructions are used to build the binaries
|
||||
release package. It's built on a Linux system either running as a virtual
|
||||
machine or a seperate computer. At this time there is no known way to
|
||||
build natively on a Windows system.
|
||||
|
||||
v3.9.0.1
|
||||
|
||||
Isolate Windows CPU groups code when CPU groups support not explicitly defined.
|
||||
|
||||
v3.9.0
|
||||
|
||||
Added support for Windows CPU groups.
|
||||
Fixed BIP34 coinbase height.
|
||||
Prep work for AVX512.
|
||||
Added lyra2rev3 for the vertcoin algo change.
|
||||
Added yespower, yespowerr16 (Yenten)
|
||||
Added phi2 algo for LUX
|
||||
Discontinued support for cryptonight and variants.
|
||||
|
||||
v3.8.8.1
|
||||
|
||||
Fixed x16r.
|
||||
Removed cryptonight variant check due to false positives.
|
||||
API displays hashrate before shares are submitted.
|
||||
|
||||
v3.8.8
|
||||
|
||||
Added cryptonightv7 for Monero.
|
||||
|
||||
v3.8.7.2
|
||||
|
||||
Fixed argon2d-dyn regression in v3.8.7.1.
|
||||
Changed compile options for aes-sse42 Windows build to -march=westmere
|
||||
|
||||
v3.8.7.1
|
||||
|
||||
Fixed argon2d-uis low difficulty rejects.
|
||||
Fixed argon2d aliases.
|
||||
|
||||
v3.8.7
|
||||
|
||||
Added argon2d4096 (alias argon2d-uis) for Unitus (UIS).
|
||||
argon2d-crds and argon2d-dyn renamed to argon2d250 and argon2d500 respectively.
|
||||
The old names are recognized as aliases.
|
||||
AVX512 is now supported for argon2d algos, Linux only.
|
||||
AVX is no longer a reported feature and an AVX Windows binary is no longer
|
||||
provided. Use AES-SSE42 build instead.
|
||||
|
||||
v3.8.6.1
|
||||
|
||||
Faster argon2d* AVX2.
|
||||
@@ -313,6 +382,7 @@ Changed default sha256 and sha512 to openssl. This should be used when
|
||||
compiling with openssl 1.0.2 or higher (Ubuntu 16.04).
|
||||
This should increase the hashrate for yescrypt, yescryptr16, m7m, xevan, skein,
|
||||
myr-gr & others when openssl 1.0.2 is installed.
|
||||
Note: -DUSE_SPH_SHA has been removed in v3.9.1.
|
||||
Users with openssl 1.0.1 (Ubuntu 14.04) may get better perforance by adding
|
||||
"-DUSE_SPH_SHA" to CLAGS.
|
||||
Windows binaries are compiled with -DUSE_SPH_SHA and won't get the speedup.
|
||||
|
69
aclocal.m4
vendored
69
aclocal.m4
vendored
@@ -1,6 +1,6 @@
|
||||
# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
|
||||
# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
|
||||
If you have problems, you may need to regenerate the build system entirely.
|
||||
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
||||
|
||||
# Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2002-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
|
||||
# generated from the m4 files accompanying Automake X.Y.
|
||||
# (This private macro should not be called outside this file.)
|
||||
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||
[am__api_version='1.14'
|
||||
[am__api_version='1.15'
|
||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||
dnl require some minimum version. Point them to the right macro.
|
||||
m4_if([$1], [1.14.1], [],
|
||||
m4_if([$1], [1.15.1], [],
|
||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||
])
|
||||
|
||||
@@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
|
||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||
[AM_AUTOMAKE_VERSION([1.14.1])dnl
|
||||
[AM_AUTOMAKE_VERSION([1.15.1])dnl
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
||||
|
||||
# Figure out how to run the assembler. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -78,7 +78,7 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
|
||||
|
||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -123,15 +123,14 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
|
||||
# configured tree to be moved without reconfiguration.
|
||||
|
||||
AC_DEFUN([AM_AUX_DIR_EXPAND],
|
||||
[dnl Rely on autoconf to set up CDPATH properly.
|
||||
AC_PREREQ([2.50])dnl
|
||||
# expand $ac_aux_dir to an absolute path
|
||||
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||
[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
|
||||
# Expand $ac_aux_dir to an absolute path.
|
||||
am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
||||
])
|
||||
|
||||
# AM_CONDITIONAL -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -162,7 +161,7 @@ AC_CONFIG_COMMANDS_PRE(
|
||||
Usually this means the macro was only invoked conditionally.]])
|
||||
fi])])
|
||||
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -353,7 +352,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
|
||||
|
||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -429,7 +428,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
|
||||
# Do all the work for Automake. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -519,8 +518,8 @@ AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
|
||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
|
||||
# We need awk for the "check" target. The system "awk" is bad on
|
||||
# some platforms.
|
||||
# We need awk for the "check" target (and possibly the TAP driver). The
|
||||
# system "awk" is bad on some platforms.
|
||||
AC_REQUIRE([AC_PROG_AWK])dnl
|
||||
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
|
||||
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||
@@ -593,7 +592,11 @@ to "yes", and re-run configure.
|
||||
END
|
||||
AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
|
||||
fi
|
||||
fi])
|
||||
fi
|
||||
dnl The trailing newline in this macro's definition is deliberate, for
|
||||
dnl backward compatibility and to allow trailing 'dnl'-style comments
|
||||
dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
|
||||
])
|
||||
|
||||
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
|
||||
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
|
||||
@@ -622,7 +625,7 @@ for _am_header in $config_headers :; do
|
||||
done
|
||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -633,7 +636,7 @@ echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_co
|
||||
# Define $install_sh.
|
||||
AC_DEFUN([AM_PROG_INSTALL_SH],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
if test x"${install_sh}" != xset; then
|
||||
if test x"${install_sh+set}" != xset; then
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
||||
@@ -643,7 +646,7 @@ if test x"${install_sh}" != xset; then
|
||||
fi
|
||||
AC_SUBST([install_sh])])
|
||||
|
||||
# Copyright (C) 2003-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2003-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -665,7 +668,7 @@ AC_SUBST([am__leading_dot])])
|
||||
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
|
||||
# From Jim Meyering
|
||||
|
||||
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -700,7 +703,7 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
||||
|
||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -750,7 +753,7 @@ rm -f confinc confmf
|
||||
|
||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -789,7 +792,7 @@ fi
|
||||
|
||||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -818,7 +821,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
|
||||
AC_DEFUN([_AM_IF_OPTION],
|
||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -865,7 +868,7 @@ AC_LANG_POP([C])])
|
||||
# For backward compatibility.
|
||||
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -884,7 +887,7 @@ AC_DEFUN([AM_RUN_LOG],
|
||||
|
||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -965,7 +968,7 @@ AC_CONFIG_COMMANDS_PRE(
|
||||
rm -f conftest.file
|
||||
])
|
||||
|
||||
# Copyright (C) 2009-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2009-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1025,7 +1028,7 @@ AC_SUBST([AM_BACKSLASH])dnl
|
||||
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
|
||||
])
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1053,7 +1056,7 @@ fi
|
||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||
|
||||
# Copyright (C) 2006-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2006-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1072,7 +1075,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
|
||||
|
||||
# Check how to create a tarball. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2004-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2004-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
|
264
algo-gate-api.c
264
algo-gate-api.c
@@ -69,6 +69,7 @@ void do_nothing () {}
|
||||
bool return_true () { return true; }
|
||||
bool return_false () { return false; }
|
||||
void *return_null () { return NULL; }
|
||||
void call_error () { printf("ERR: Uninitialized function pointer\n"); }
|
||||
|
||||
void algo_not_tested()
|
||||
{
|
||||
@@ -113,7 +114,8 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->hash_suw = (void*)&null_hash_suw;
|
||||
gate->get_new_work = (void*)&std_get_new_work;
|
||||
gate->get_nonceptr = (void*)&std_get_nonceptr;
|
||||
gate->display_extra_data = (void*)&do_nothing;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->decode_extra_data = (void*)&do_nothing;
|
||||
gate->wait_for_diff = (void*)&std_wait_for_diff;
|
||||
gate->get_max64 = (void*)&get_max64_0x1fffffLL;
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
@@ -121,7 +123,6 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
||||
gate->malloc_txs_request = (void*)&std_malloc_txs_request;
|
||||
gate->set_target = (void*)&std_set_target;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
||||
gate->build_block_header = (void*)&std_build_block_header;
|
||||
gate->build_extraheader = (void*)&std_build_extraheader;
|
||||
@@ -132,11 +133,11 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->do_this_thread = (void*)&return_true;
|
||||
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
||||
gate->stratum_handle_response = (void*)&std_stratum_handle_response;
|
||||
gate->get_work_data_size = (void*)&std_get_work_data_size;
|
||||
gate->optimizations = EMPTY_SET;
|
||||
gate->ntime_index = STD_NTIME_INDEX;
|
||||
gate->nbits_index = STD_NBITS_INDEX;
|
||||
gate->nonce_index = STD_NONCE_INDEX;
|
||||
gate->work_data_size = STD_WORK_DATA_SIZE;
|
||||
gate->work_cmp_size = STD_WORK_CMP_SIZE;
|
||||
}
|
||||
|
||||
@@ -147,97 +148,116 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
// called by each thread that uses the gate
|
||||
bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
{
|
||||
if ( NULL == gate )
|
||||
{
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, NULL gate\n");
|
||||
return false;
|
||||
}
|
||||
if ( NULL == gate )
|
||||
{
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, NULL gate\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
init_algo_gate( gate );
|
||||
init_algo_gate( gate );
|
||||
|
||||
switch (algo)
|
||||
{
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_ARGON2DCRDS: register_argon2d_crds_algo( gate ); break;
|
||||
case ALGO_ARGON2DDYN: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL10: register_timetravel10_algo( gate ); break;
|
||||
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
|
||||
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
|
||||
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
return false;
|
||||
} // switch
|
||||
switch (algo)
|
||||
{
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_ARGON2D250: register_argon2d_crds_algo ( gate ); break;
|
||||
case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_BMW512: register_bmw512_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHTV7: register_cryptonightv7_algo ( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HEX: register_hex_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV3: register_lyra2rev3_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
|
||||
case ALGO_PHI2: register_phi2_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256Q: register_sha256q_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
|
||||
case ALGO_SONOA: register_sonoa_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL10: register_timetravel10_algo ( gate ); break;
|
||||
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
|
||||
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
|
||||
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13BCD: register_x13bcd_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X16RT: register_x16rt_algo ( gate ); break;
|
||||
case ALGO_X16RT_VEIL: register_x16rt_veil_algo ( gate ); break;
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_X21S: register_x21s_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
/* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_05_algo ( gate ); break;
|
||||
*/
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;
|
||||
case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
return false;
|
||||
} // switch
|
||||
|
||||
// ensure required functions were defined.
|
||||
// ensure required functions were defined.
|
||||
if ( gate->scanhash == (void*)&null_scanhash )
|
||||
{
|
||||
applog(LOG_ERR, "FAIL: Required algo_gate functions undefined\n");
|
||||
@@ -252,6 +272,10 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
// override std defaults with jr2 defaults
|
||||
bool register_json_rpc2( algo_gate_t *gate )
|
||||
{
|
||||
applog(LOG_WARNING,"\nCryptonight algorithm and variants are no longer");
|
||||
applog(LOG_WARNING,"supported by cpuminer-opt. Shares submitted will");
|
||||
applog(LOG_WARNING,"likely be rejected. Proceed at your own risk.\n");
|
||||
|
||||
gate->wait_for_diff = (void*)&do_nothing;
|
||||
gate->get_new_work = (void*)&jr2_get_new_work;
|
||||
gate->get_nonceptr = (void*)&jr2_get_nonceptr;
|
||||
@@ -288,6 +312,9 @@ void exec_hash_function( int algo, void *output, const void *pdata )
|
||||
const char* const algo_alias_map[][2] =
|
||||
{
|
||||
// alias proper
|
||||
{ "argon2d-crds", "argon2d250" },
|
||||
{ "argon2d-dyn", "argon2d500" },
|
||||
{ "argon2d-uis", "argon2d4096" },
|
||||
{ "bitcore", "timetravel10" },
|
||||
{ "bitzeny", "yescryptr8" },
|
||||
{ "blake256r8", "blakecoin" },
|
||||
@@ -305,7 +332,7 @@ const char* const algo_alias_map[][2] =
|
||||
{ "jane", "scryptjane" },
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2zoin", "lyra2z330" },
|
||||
{ "lyra2v3", "lyra2rev3" },
|
||||
{ "myrgr", "myr-gr" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
@@ -313,17 +340,16 @@ const char* const algo_alias_map[][2] =
|
||||
// { "sia", "blake2b" },
|
||||
{ "sib", "x11gost" },
|
||||
{ "timetravel8", "timetravel" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ "veil", "x16rt-veil" },
|
||||
{ "x16r-hex", "hex" },
|
||||
{ "yenten", "yescryptr16" },
|
||||
{ "yescryptr8k", "yescrypt" },
|
||||
{ "zcoin", "lyra2z" },
|
||||
{ "zoin", "lyra2z330" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
// if arg is a valid alias for a known algo it is updated with the proper name.
|
||||
// No validation of the algo or alias is done, It is the responsinility of the
|
||||
// calling function to validate the algo after return.
|
||||
// if arg is a valid alias for a known algo it is updated with the proper
|
||||
// name. No validation of the algo or alias is done, It is the responsinility
|
||||
// of the calling function to validate the algo after return.
|
||||
void get_algo_alias( char** algo_or_alias )
|
||||
{
|
||||
int i;
|
||||
@@ -336,3 +362,43 @@ void get_algo_alias( char** algo_or_alias )
|
||||
}
|
||||
}
|
||||
|
||||
#undef ALIAS
|
||||
#undef PROPER
|
||||
|
||||
bool submit_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( submit_work( thr, work ) )
|
||||
{
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d, job %s.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr->id, work->job_id );
|
||||
return true;
|
||||
}
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
return false;
|
||||
}
|
||||
|
||||
bool submit_lane_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( submit_work( thr, work ) )
|
||||
{
|
||||
if ( !opt_quiet )
|
||||
// applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d.",
|
||||
// accepted_share_count + rejected_share_count + 1,
|
||||
// thr->id, lane );
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d, job %s.",
|
||||
accepted_share_count + rejected_share_count + 1, thr->id,
|
||||
lane, work->job_id );
|
||||
return true;
|
||||
}
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -2,6 +2,7 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "miner.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
/////////////////////////////
|
||||
////
|
||||
@@ -91,6 +92,7 @@ typedef uint32_t set_t;
|
||||
#define AVX_OPT 8
|
||||
#define AVX2_OPT 0x10
|
||||
#define SHA_OPT 0x20
|
||||
#define AVX512_OPT 0x40
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
||||
@@ -106,8 +108,15 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// special case, only one target, provides a callback for scanhash to
|
||||
// submit work with less overhead.
|
||||
// bool (*submit_work ) ( struct thr_info*, const struct work* );
|
||||
|
||||
// mandatory functions, must be overwritten
|
||||
int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
|
||||
// Added a 5th arg for the thread_info structure to replace the int thr id
|
||||
// in the first arg. Both will co-exist during the trasition.
|
||||
//int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
|
||||
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
||||
|
||||
// optional unsafe, must be overwritten if algo uses function
|
||||
void ( *hash ) ( void*, const void*, uint32_t ) ;
|
||||
@@ -119,7 +128,7 @@ void ( *stratum_gen_work ) ( struct stratum_ctx*, struct work* );
|
||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t*,
|
||||
bool );
|
||||
uint32_t *( *get_nonceptr ) ( uint32_t* );
|
||||
void ( *display_extra_data ) ( struct work*, uint64_t* );
|
||||
void ( *decode_extra_data ) ( struct work*, uint64_t* );
|
||||
void ( *wait_for_diff ) ( struct stratum_ctx* );
|
||||
int64_t ( *get_max64 ) ();
|
||||
bool ( *work_decode ) ( const json_t*, struct work* );
|
||||
@@ -128,7 +137,7 @@ bool ( *submit_getwork_result ) ( CURL*, struct work* );
|
||||
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
||||
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
|
||||
uint32_t*, uint32_t, uint32_t );
|
||||
uint32_t*, uint32_t, uint32_t );
|
||||
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
|
||||
char* ( *malloc_txs_request ) ( struct work* );
|
||||
void ( *set_work_data_endian ) ( struct work* );
|
||||
@@ -139,12 +148,11 @@ bool ( *do_this_thread ) ( int );
|
||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||
bool ( *stratum_handle_response )( json_t* );
|
||||
set_t optimizations;
|
||||
int ( *get_work_data_size ) ();
|
||||
int ntime_index;
|
||||
int nbits_index;
|
||||
int nonce_index; // use with caution, see warning below
|
||||
int work_data_size;
|
||||
int work_cmp_size;
|
||||
|
||||
} algo_gate_t;
|
||||
|
||||
extern algo_gate_t algo_gate;
|
||||
@@ -185,6 +193,15 @@ void four_way_not_tested();
|
||||
// allways returns failure
|
||||
int null_scanhash();
|
||||
|
||||
// Allow algos to submit from scanhash loop.
|
||||
bool submit_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr );
|
||||
bool submit_lane_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane );
|
||||
|
||||
|
||||
bool submit_work( struct thr_info *thr, const struct work *work_in );
|
||||
|
||||
// displays warning
|
||||
void null_hash ();
|
||||
void null_hash_suw();
|
||||
@@ -239,8 +256,8 @@ void set_work_data_big_endian( struct work *work );
|
||||
double std_calc_network_diff( struct work *work );
|
||||
|
||||
void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits );
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits );
|
||||
|
||||
void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||
|
||||
@@ -253,6 +270,8 @@ bool jr2_stratum_handle_response( json_t *val );
|
||||
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id );
|
||||
|
||||
int std_get_work_data_size();
|
||||
|
||||
// Gate admin functions
|
||||
|
||||
// Called from main to initialize all gate functions and algo-specific data
|
||||
|
@@ -42,12 +42,14 @@ void argon2hash(void *output, const void *input)
|
||||
(unsigned char *)output);
|
||||
}
|
||||
|
||||
int scanhash_argon2(int thr_id, struct work* work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_argon2( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
@@ -28,41 +28,39 @@ void argon2d_crds_hash( void *output, const void *input )
|
||||
context.lanes = 4; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 1; // Iterations
|
||||
context.version = ARGON2_VERSION_10;
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint32_t nonce = first_nonce;
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_crds_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_crds_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d_crds_algo( algo_gate_t* gate )
|
||||
@@ -70,7 +68,8 @@ bool register_argon2d_crds_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_argon2d_crds;
|
||||
gate->hash = (void*)&argon2d_crds_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Dynamic
|
||||
@@ -96,41 +95,40 @@ void argon2d_dyn_hash( void *output, const void *input )
|
||||
context.lanes = 8; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 2; // Iterations
|
||||
context.version = ARGON2_VERSION_10;
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint32_t nonce = first_nonce;
|
||||
do
|
||||
{
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_dyn_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
argon2d_dyn_hash( hash, endiandata );
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio(work, hash);
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate )
|
||||
@@ -138,6 +136,57 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_argon2d_dyn;
|
||||
gate->hash = (void*)&argon2d_dyn_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Unitus
|
||||
|
||||
int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t t_cost = 1; // 1 iteration
|
||||
uint32_t m_cost = 4096; // use 4MB
|
||||
uint32_t parallelism = 1; // 1 thread, 2 lanes
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) endiandata, 80,
|
||||
(char*) endiandata, 80, (char*) vhash, 32, ARGON2_VERSION_13 );
|
||||
if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, vhash, mythr );
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t get_max64_0x1ff() { return 0x1ff; }
|
||||
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d4096;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ff;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -4,22 +4,28 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
// Credits
|
||||
// Credits: version = 0x10, m_cost = 250.
|
||||
bool register_argon2d_crds_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_crds_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
// Dynamic
|
||||
// Dynamic: version = 0x10, m_cost = 500.
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_dyn_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
|
||||
// Unitus: version = 0x13, m_cost = 4096.
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate );
|
||||
|
||||
int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -180,60 +180,65 @@ int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
char *encoded, const size_t encodedlen,
|
||||
const uint32_t version) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_i,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
version );
|
||||
}
|
||||
|
||||
int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
const size_t saltlen, void *hash, const size_t hashlen,
|
||||
const uint32_t version ) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER);
|
||||
hash, hashlen, NULL, 0, Argon2_i, version );
|
||||
}
|
||||
|
||||
int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
char *encoded, const size_t encodedlen,
|
||||
const uint32_t version ) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_d,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
version );
|
||||
}
|
||||
|
||||
int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
const size_t saltlen, void *hash, const size_t hashlen,
|
||||
const uint32_t version ) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER);
|
||||
hash, hashlen, NULL, 0, Argon2_d, version );
|
||||
}
|
||||
|
||||
int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
char *encoded, const size_t encodedlen,
|
||||
const uint32_t version ) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_id,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
version);
|
||||
}
|
||||
|
||||
int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
const size_t saltlen, void *hash, const size_t hashlen,
|
||||
const uint32_t version ) {
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_id,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
hash, hashlen, NULL, 0, Argon2_id, version );
|
||||
}
|
||||
|
||||
static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) {
|
||||
@@ -443,10 +448,11 @@ const char *argon2_error_message(int error_code) {
|
||||
return "Unknown error code";
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism,
|
||||
uint32_t saltlen, uint32_t hashlen, argon2_type type) {
|
||||
return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) +
|
||||
numlen(t_cost) + numlen(m_cost) + numlen(parallelism) +
|
||||
b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + 1;
|
||||
}
|
||||
*/
|
||||
|
@@ -225,11 +225,8 @@ typedef enum Argon2_type {
|
||||
} argon2_type;
|
||||
|
||||
/* Version of the algorithm */
|
||||
typedef enum Argon2_version {
|
||||
ARGON2_VERSION_10 = 0x10,
|
||||
ARGON2_VERSION_13 = 0x13,
|
||||
ARGON2_VERSION_NUMBER = ARGON2_VERSION_10
|
||||
} argon2_version;
|
||||
#define ARGON2_VERSION_10 0x10
|
||||
#define ARGON2_VERSION_13 0x13
|
||||
|
||||
/*
|
||||
* Function that gives the string representation of an argon2_type.
|
||||
@@ -267,7 +264,8 @@ ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
const size_t encodedlen,
|
||||
const uint32_t version );
|
||||
|
||||
/**
|
||||
* Hashes a password with Argon2i, producing a raw hash at @hash
|
||||
@@ -287,7 +285,8 @@ ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
const size_t hashlen,
|
||||
const uint32_t version );
|
||||
|
||||
ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
@@ -295,13 +294,15 @@ ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
const size_t encodedlen,
|
||||
const uint32_t version );
|
||||
|
||||
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
const size_t hashlen,
|
||||
const uint32_t version );
|
||||
|
||||
ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
@@ -309,14 +310,16 @@ ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
const size_t encodedlen,
|
||||
const uint32_t version );
|
||||
|
||||
ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
const size_t hashlen,
|
||||
const uint32_t version );
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
@@ -325,7 +328,7 @@ ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type,
|
||||
const uint32_t version);
|
||||
const uint32_t version );
|
||||
|
||||
/**
|
||||
* Verifies a password against an encoded string
|
||||
|
@@ -17,7 +17,7 @@
|
||||
|
||||
#if !defined(ARGON2_NO_THREADS)
|
||||
|
||||
#include "thread.h"
|
||||
#include "argon2d_thread.h"
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
@@ -30,7 +30,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "core.h"
|
||||
#include "thread.h"
|
||||
#include "argon2d_thread.h"
|
||||
#include "../blake2/blake2.h"
|
||||
#include "../blake2/blake2-impl.h"
|
||||
|
||||
@@ -112,7 +112,7 @@ int allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
void free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num*size;
|
||||
clear_internal_memory(memory, memory_size);
|
||||
// clear_internal_memory(memory, memory_size);
|
||||
if (context->free_cbk) {
|
||||
(context->free_cbk)(memory, memory_size);
|
||||
} else {
|
||||
@@ -137,7 +137,7 @@ void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
|
||||
int FLAG_clear_internal_memory = 0;
|
||||
void clear_internal_memory(void *v, size_t n) {
|
||||
if (FLAG_clear_internal_memory && v) {
|
||||
secure_wipe_memory(v, n);
|
||||
// secure_wipe_memory(v, n);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -544,7 +544,8 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
store32(&value, context->t_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, ARGON2_VERSION_NUMBER);
|
||||
// store32(&value, ARGON2_VERSION_NUMBER);
|
||||
store32(&value, context->version);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, (uint32_t)type);
|
||||
@@ -558,7 +559,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
context->pwdlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
|
||||
secure_wipe_memory(context->pwd, context->pwdlen);
|
||||
// secure_wipe_memory(context->pwd, context->pwdlen);
|
||||
context->pwdlen = 0;
|
||||
}
|
||||
}
|
||||
@@ -579,7 +580,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
context->secretlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
|
||||
secure_wipe_memory(context->secret, context->secretlen);
|
||||
// secure_wipe_memory(context->secret, context->secretlen);
|
||||
context->secretlen = 0;
|
||||
}
|
||||
}
|
||||
|
@@ -345,15 +345,15 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
// if (ARGON2_VERSION_10 == instance->version) {
|
||||
// /* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
// fill_block(state, ref_block, curr_block, 0);
|
||||
// } else {
|
||||
// if(0 == position.pass) {
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
} else {
|
||||
if(0 == position.pass) {
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
// } else {
|
||||
// fill_block(state, ref_block, curr_block, 1);
|
||||
// }
|
||||
// }
|
||||
} else {
|
||||
fill_block(state, ref_block, curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -15,11 +15,11 @@ void blakehash_4way(void *state, const void *input)
|
||||
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
|
||||
blake256r14_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4way_close( &ctx, vhash );
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
@@ -27,43 +27,34 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256r14_4way_init( &blake_4w_ctx );
|
||||
blake256r14_4way( &blake_4w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
blakehash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= HTarget )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -79,13 +70,13 @@ void blakehash_8way( void *state, const void *input )
|
||||
memcpy( &ctx, &blake_8w_ctx, sizeof ctx );
|
||||
blake256r14_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r14_8way_close( &ctx, vhash );
|
||||
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
_dintrlv_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
@@ -93,33 +84,21 @@ int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
|
||||
blake256r14_8way_init( &blake_8w_ctx );
|
||||
blake256r14_8way( &blake_8w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
|
||||
blakehash_8way( hash, vdata );
|
||||
@@ -128,17 +107,14 @@ int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
num_found++;
|
||||
nonces[i] = n+i;
|
||||
work_set_target_ratio( work, hash+1 );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -10,12 +10,12 @@
|
||||
|
||||
#if defined (BLAKE_4WAY)
|
||||
void blakehash_4way(void *state, const void *input);
|
||||
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void blakehash( void *state, const void *input );
|
||||
int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -37,7 +37,7 @@
|
||||
#ifndef __BLAKE_HASH_4WAY__
|
||||
#define __BLAKE_HASH_4WAY__ 1
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
//#ifdef __SSE4_2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -45,7 +45,7 @@ extern "C"{
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_blake256 256
|
||||
|
||||
@@ -57,19 +57,22 @@ extern "C"{
|
||||
// Blake-256 4 way
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[16] __attribute__ ((aligned (64)));
|
||||
__m128i H[8];
|
||||
__m128i S[4];
|
||||
unsigned char buf[64<<2];
|
||||
uint32_t H[8<<2];
|
||||
uint32_t S[4<<2];
|
||||
// __m128i buf[16] __attribute__ ((aligned (64)));
|
||||
// __m128i H[8];
|
||||
// __m128i S[4];
|
||||
size_t ptr;
|
||||
sph_u32 T0, T1;
|
||||
uint32_t T0, T1;
|
||||
int rounds; // 14 for blake, 8 for blakecoin & vanilla
|
||||
} blake_4way_small_context;
|
||||
} blake_4way_small_context __attribute__ ((aligned (64)));
|
||||
|
||||
// Default 14 rounds
|
||||
typedef blake_4way_small_context blake256_4way_context;
|
||||
void blake256_4way_init(void *cc);
|
||||
void blake256_4way(void *cc, const void *data, size_t len);
|
||||
void blake256_4way_close(void *cc, void *dst);
|
||||
void blake256_4way_init(void *ctx);
|
||||
void blake256_4way(void *ctx, const void *data, size_t len);
|
||||
void blake256_4way_close(void *ctx, void *dst);
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
typedef blake_4way_small_context blake256r14_4way_context;
|
||||
@@ -132,12 +135,10 @@ void blake512_4way_close(void *cc, void *dst);
|
||||
void blake512_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
#endif // AVX2
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // BLAKE_HASH_4WAY_H__
|
||||
|
@@ -39,8 +39,8 @@ void blakehash(void *state, const void *input)
|
||||
|
||||
}
|
||||
|
||||
int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -49,6 +49,7 @@ int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[20];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
ctx_midstate_done = false;
|
||||
|
||||
|
1062
algo/blake/blake256-hash-4way.c
Normal file
1062
algo/blake/blake256-hash-4way.c
Normal file
File diff suppressed because it is too large
Load Diff
322
algo/blake/blake256-hash-4way.c.new
Normal file
322
algo/blake/blake256-hash-4way.c.new
Normal file
@@ -0,0 +1,322 @@
|
||||
// convert blake256 32 bit to use 64 bit with serial vectoring
|
||||
//
|
||||
// cut calls to GS in half
|
||||
//
|
||||
// combine V
|
||||
// v0 = {V0,V1}
|
||||
// v1 = {V2,V3}
|
||||
// v2 = {V4,V5}
|
||||
// v3 = {V6,V7}
|
||||
// v4 = {V8,V9}
|
||||
// v5 = {VA,VB}
|
||||
// v6 = {VC,VD}
|
||||
// v7 = {CE,VF}
|
||||
//
|
||||
// v6x = {VD,VC} swap(VC,VD) swap(v6)
|
||||
// v7x = {VF,VE} swap(VE,VF) swap(v7)
|
||||
//
|
||||
// V0 = v1v0
|
||||
// V1 = v3v2
|
||||
// V2 = v5v4
|
||||
// V3 = v7v6
|
||||
// V4 = v9v8
|
||||
// V5 = vbva
|
||||
// V6 = vdvc
|
||||
// V7 = vfve
|
||||
//
|
||||
// The rotate in ROUND is to effect straddle and unstraddle for the third
|
||||
// and 4th iteration of GS.
|
||||
// It concatenates 2 contiguous 256 bit vectors and extracts the middle
|
||||
// 256 bits. After the transform they must be restored with only the
|
||||
// chosen bits modified in the original 2 vectors.
|
||||
// ror1x128 achieves this by putting the chosen bits in arg1, the "low"
|
||||
// 256 bit vector and saves the untouched bits temporailly in arg0, the
|
||||
// "high" 256 bit vector. Simply reverse the process to restore data back
|
||||
// to original positions.
|
||||
|
||||
// Use standard 4way when AVX2 is not available use x2 mode with AVX2.
|
||||
//
|
||||
// Data is organised the same as 32 bit 4 way, in effect serial vectoring
|
||||
// on top of parallel vectoring. Same data in the same place just taking
|
||||
// two chunks at a time.
|
||||
//
|
||||
// Transparent to user, x2 mode used when AVX2 detected.
|
||||
// Use existing 4way context but revert to scalar types.
|
||||
// Same interleave function (128 bit) or x2 with 256 bit?
|
||||
// User trsnaparency would have to apply to interleave as well.
|
||||
//
|
||||
// Use common 4way update and close
|
||||
|
||||
/*
|
||||
typedef struct {
|
||||
unsigned char buf[64<<2];
|
||||
uint32_t H[8<<2];
|
||||
uint32_t S[4<<2];
|
||||
size_t ptr;
|
||||
uint32_t T0, T1;
|
||||
int rounds; // 14 for blake, 8 for blakecoin & vanilla
|
||||
} blakex2_4way_small_context __attribute__ ((aligned (64)));
|
||||
*/
|
||||
|
||||
static void
|
||||
blake32x2_4way_init( blake_4way_small_context *ctx, const uint32_t *iv,
|
||||
const uint32_t *salt, int rounds )
|
||||
{
|
||||
casti_m128i( ctx->H, 0 ) = _mm_set1_epi32( iv[0] );
|
||||
casti_m128i( ctx->H, 1 ) = _mm_set1_epi32( iv[1] );
|
||||
casti_m128i( ctx->H, 2 ) = _mm_set1_epi32( iv[2] );
|
||||
casti_m128i( ctx->H, 3 ) = _mm_set1_epi32( iv[3] );
|
||||
casti_m128i( ctx->H, 4 ) = _mm_set1_epi32( iv[4] );
|
||||
casti_m128i( ctx->H, 5 ) = _mm_set1_epi32( iv[5] );
|
||||
casti_m128i( ctx->H, 6 ) = _mm_set1_epi32( iv[6] );
|
||||
casti_m128i( ctx->H, 7 ) = _mm_set1_epi32( iv[7] );
|
||||
|
||||
casti_m128i( ctx->S, 0 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 1 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 2 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 3 ) = m128_zero;
|
||||
/*
|
||||
sc->S[0] = _mm_set1_epi32( salt[0] );
|
||||
sc->S[1] = _mm_set1_epi32( salt[1] );
|
||||
sc->S[2] = _mm_set1_epi32( salt[2] );
|
||||
sc->S[3] = _mm_set1_epi32( salt[3] );
|
||||
*/
|
||||
ctx->T0 = ctx->T1 = 0;
|
||||
ctx->ptr = 0;
|
||||
ctx->rounds = rounds;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32x2( blake_4way_small_context *ctx, const void *data, size_t len )
|
||||
{
|
||||
__m128i *buf = (__m256i*)ctx->buf;
|
||||
size_t bptr = ctx->ptr << 2;
|
||||
size_t vptr = ctx->ptr >> 3;
|
||||
size_t blen = len << 2;
|
||||
// unsigned char *buf = ctx->buf;
|
||||
// size_t ptr = ctx->ptr<<4; // repurposed
|
||||
DECL_STATE32x2
|
||||
|
||||
// buf = sc->buf;
|
||||
// ptr = sc->ptr;
|
||||
|
||||
// adjust len for use with ptr, clen, all absolute bytes.
|
||||
// int blen = len<<2;
|
||||
|
||||
if ( blen < (sizeof ctx->buf) - bptr )
|
||||
{
|
||||
memcpy( buf + vptr, data, blen );
|
||||
ptr += blen;
|
||||
ctx->ptr = bptr >> 2;;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE32( ctx );
|
||||
while ( blen > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = ( sizeof sc->buf ) - ptr;
|
||||
if ( clen > blen )
|
||||
clen = blen;
|
||||
memcpy( buf + vptr, data, clen );
|
||||
bptr += clen;
|
||||
vptr = bptr >> 5;
|
||||
data = (const unsigned char *)data + clen;
|
||||
blen -= clen;
|
||||
if ( bptr == sizeof ctx->buf )
|
||||
{
|
||||
if ( ( T0 = T0 + 512 ) < 512 ) // not needed, will never rollover
|
||||
T1 += 1;
|
||||
COMPRESS32x2_4WAY( ctx->rounds );
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE32x2( ctx );
|
||||
ctx->ptr = bptr >> 2;
|
||||
}
|
||||
|
||||
static void
|
||||
blake32x2_4way_close( blake_4way_small_context *ctx, void *dst )
|
||||
{
|
||||
__m256i buf[8] __attribute__ ((aligned (64)));
|
||||
size_t ptr = ctx->ptr;
|
||||
size_t vptr = ctx->ptr>>2;
|
||||
unsigned bit_len = ( (unsigned)ptr << 3 ); // one lane
|
||||
uint32_t th = ctx->T1;
|
||||
uint32_t tl = ctx->T0 + bit_len;
|
||||
|
||||
if ( ptr == 0 )
|
||||
{
|
||||
ctx->T0 = 0xFFFFFE00UL;
|
||||
ctx->T1 = 0xFFFFFFFFUL;
|
||||
}
|
||||
else if ( ctx->T0 == 0 )
|
||||
{
|
||||
ctx->T0 = 0xFFFFFE00UL + bit_len;
|
||||
ctx->T1 -= 1;
|
||||
}
|
||||
else
|
||||
ctx->T0 -= 512 - bit_len;
|
||||
|
||||
// memset doesn't do ints
|
||||
buf[ vptr ] = _mm256_set_epi32( 0,0,0,0, 0x80, 0x80, 0x80, 0x80 );
|
||||
|
||||
if ( vptr < 5 )
|
||||
{
|
||||
memset_zero_256( buf + vptr + 1, 6 - vptr );
|
||||
buf[ 6 ] = _mm256_or_si256( vbuf[ 6 ], _mm256_set_epi32(
|
||||
0x01000000UL,0x01000000UL,0x01000000UL,0x01000000UL, 0,0,0,0 ) );
|
||||
buf[ 7 ] = mm256_bswap_32( _mm256_set_epi32( tl,tl,tl,tl,
|
||||
th,th,th,th ) );
|
||||
blake32x2_4way( ctx, buf + vptr, 64 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_256( vbuf + vptr + 1, 7 - vptr );
|
||||
blake32x2_4way( ctx, vbuf + ptr, 64 - ptr );
|
||||
ctx->T0 = 0xFFFFFE00UL;
|
||||
ctx->T1 = 0xFFFFFFFFUL;
|
||||
buf[ 6 ] = mm256_zero;
|
||||
buf[ 6 ] = _mm256_set_epi32( 0,0,0,0,
|
||||
0x01000000UL,0x01000000UL,0x01000000UL,0x01000000UL );
|
||||
buf[ 7 ] = mm256_bswap_32( _mm256_set_epi32( tl, tl, tl, tl,
|
||||
th, th, th, th );
|
||||
blake32x2_4way( ctx, buf, 64 );
|
||||
}
|
||||
|
||||
casti_m256i( dst, 0 ) = mm256_bswap_32( casti_m256i( ctx->H, 0 ) );
|
||||
casti_m256i( dst, 1 ) = mm256_bswap_32( casti_m256i( ctx->H, 1 ) );
|
||||
casti_m256i( dst, 2 ) = mm256_bswap_32( casti_m256i( ctx->H, 2 ) );
|
||||
casti_m256i( dst, 3 ) = mm256_bswap_32( casti_m256i( ctx->H, 3 ) );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#define DECL_STATE32x2_4WAY \
|
||||
__m256i H0, H1, H2, H3; \
|
||||
__m256i S0, S1; \
|
||||
uint32_t T0, T1;
|
||||
|
||||
#define READ_STATE32x2_4WAY(state) do \
|
||||
{ \
|
||||
H0 = casti_m256i( state->H, 0 ); \
|
||||
H1 = casti_m256i( state->H, 1 ); \
|
||||
H2 = casti_m256i( state->H, 2 ); \
|
||||
H3 = casti_m256i( state->H, 3 ); \
|
||||
S0 = casti_m256i( state->S, 0 ); \
|
||||
S1 = casti_m256i( state->S, 1 ); \
|
||||
T0 = state->T0; \
|
||||
T1 = state->T1; \
|
||||
|
||||
#define WRITE_STATE32x2_4WAY(state) do { \
|
||||
casti_m256i( state->H, 0 ) = H0; \
|
||||
casti_m256i( state->H, 1 ) = H1; \
|
||||
casti_m256i( state->H, 2 ) = H2; \
|
||||
casti_m256i( state->H, 3 ) = H3; \
|
||||
casti_m256i( state->S, 0 ) = S0; \
|
||||
casti_m256i( state->S, 1 ) = S1; \
|
||||
state->T0 = T0; \
|
||||
state->T1 = T1; \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define GSx2_4WAY( m0m2, m1m3, c0c2, c1c3, a, b, c, d ) do \
|
||||
{ \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set_epi32( c1,c3, c1,c3, c1,c3, c1,c3 ), \
|
||||
_mm256_set_epi32( m0,m2, m0,m2, m0,m2, m0,m2 ) ), b ), a ); \
|
||||
d = mm256_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( _mm256_xor_si256( \
|
||||
_mm256_set_epi32( c0,c2, c0,c2, c0,c2, c0,c2 ), \
|
||||
_mm256_set_epi32( m1,m3, m1,m3, m1,m3, m1,m3 ) ), b ), a ); \
|
||||
d = mm256_ror_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_ror_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_Sx2_4WAY(r) do \
|
||||
{ \
|
||||
GS2_4WAY( Mx(r, 0), Mx(r, 1), Mx(r, 2), Mx(r, 3), \
|
||||
CSx(r, 0), CSx(r, 1), CSx(r, 2), CSx(r, 3), V0, V2, V4, V6 ); \
|
||||
GS2_4WAY( Mx(r, 4), Mx(r, 5), Mx(r, 6), Mx(r, 7), \
|
||||
CSx(r, 4), CSx(r, 5), CSx(r, 6), CSx(r, 7), V1, V3, V5, V7 ); \
|
||||
mm256_ror1x128_512( V3, V2 ); \
|
||||
mm256_ror1x128_512( V6, V7 ); \
|
||||
GS2_4WAY( Mx(r, 8), Mx(r, 9), Mx(r, A), Mx(r, B), \
|
||||
CSx(r, 8), CSx(r, 9), CSx(r, A), CSx(r, B), V0, V2, V5, V7 ); \
|
||||
GS2_4WAY( Mx(r, C), Mx(r, D), Mx(r, C), Mx(r, D), \
|
||||
CSx(r, C), CSx(r, D), CSx(r, C), CSx(r, D), V1, V3, V4, V6 ); \
|
||||
mm256_rol1x128_512( V2, V3 ); \
|
||||
mm256_rol1x128_512( V7, V6 );
|
||||
|
||||
#define COMPRESS32x2_4WAY( rounds ) do \
|
||||
{ \
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
unsigned r; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = _mm256_xor_si256( S0, _mm256_set_epi32( CS1, CS1, CS1, CS1, \
|
||||
CS0, CS0, CS0, CS0 ) ); \
|
||||
V5 = _mm256_xor_si256( S1, _mm256_set_epi32( CS3, CS3, CS3, CS3, \
|
||||
CS2, CS2, CS2, CS2 ) ); \
|
||||
V6 = _mm256_xor_si256( _mm256_set1_epi32( T0 ), \
|
||||
_mm256_set_epi32( CS5, CS5, CS5, CS5, \
|
||||
CS4, CS4, CS4, CS4 ) ); \
|
||||
V7 = _mm256_xor_si256( _mm256_set1_epi32( T1 ), \
|
||||
_mm256_set_epi32( CS7, CS7, CS7, CS7, \
|
||||
CS6, CS6, CS6, CS6 ) ); \
|
||||
M0 = mm256_bswap_32( buf[ 0] ); \
|
||||
M1 = mm256_bswap_32( buf[ 1] ); \
|
||||
M2 = mm256_bswap_32( buf[ 2] ); \
|
||||
M3 = mm256_bswap_32( buf[ 3] ); \
|
||||
M4 = mm256_bswap_32( buf[ 4] ); \
|
||||
M5 = mm256_bswap_32( buf[ 5] ); \
|
||||
M6 = mm256_bswap_32( buf[ 6] ); \
|
||||
M7 = mm256_bswap_32( buf[ 7] ); \
|
||||
ROUND_Sx2_4WAY(0); \
|
||||
ROUND_Sx2_4WAY(1); \
|
||||
ROUND_Sx2_4WAY(2); \
|
||||
ROUND_Sx2_4WAY(3); \
|
||||
ROUND_Sx2_4WAY(4); \
|
||||
ROUND_Sx2_4WAY(5); \
|
||||
ROUND_Sx2_4WAY(6); \
|
||||
ROUND_Sx2_4WAY(7); \
|
||||
if (rounds == 14) \
|
||||
{ \
|
||||
ROUND_Sx2_4WAY(8); \
|
||||
ROUND_Sx2_4WAY(9); \
|
||||
ROUND_Sx2_4WAY(0); \
|
||||
ROUND_Sx2_4WAY(1); \
|
||||
ROUND_Sx2_4WAY(2); \
|
||||
ROUND_Sx2_4WAY(3); \
|
||||
} \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( V8, V0 ), S0 ), H0 ); \
|
||||
H1 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( V9, V1 ), S1 ), H1 ); \
|
||||
H2 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( VA, V2 ), S2 ), H2 ); \
|
||||
H3 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( VB, V3 ), S3 ), H3 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@@ -35,13 +35,14 @@ static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
|
||||
}
|
||||
*/
|
||||
|
||||
int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake2b( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(A) vhashcpu[8];
|
||||
uint32_t _ALIGN(A) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[8];
|
||||
|
@@ -16,60 +16,49 @@ void blake2s_8way_hash( void *output, const void *input )
|
||||
blake2s_8way_update( &ctx, input + (64<<3), 16 );
|
||||
blake2s_8way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
|
||||
|
||||
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhash, 256 );
|
||||
dintrlv_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_8way_update( &blake2s_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
|
||||
blake2s_8way_hash( hash, vdata );
|
||||
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(BLAKE2S_4WAY)
|
||||
@@ -85,52 +74,46 @@ void blake2s_4way_hash( void *output, const void *input )
|
||||
blake2s_4way_update( &ctx, input + (64<<2), 16 );
|
||||
blake2s_4way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
|
||||
|
||||
mm_deinterleave_4x32( output, output+32, output+64, output+96, vhash, 256 );
|
||||
dintrlv_4x32( output, output+32, output+64, output+96,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake2s_4way_init( &blake2s_4w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_4way_update( &blake2s_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
|
||||
blake2s_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -16,19 +16,19 @@ bool register_blake2s_algo( algo_gate_t* gate );
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
|
||||
void blake2s_8way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined (BLAKE2S_4WAY)
|
||||
|
||||
void blake2s_4way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#else
|
||||
|
||||
void blake2s_hash( void *state, const void *input );
|
||||
int scanhash_blake2s( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake2s( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -92,13 +92,13 @@ int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
|
||||
#define G4W(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
d = mm128_ror_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
b = mm128_ror_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm_ror_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
d = mm128_ror_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_ror_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
b = mm128_ror_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND4W(r) \
|
||||
|
@@ -16,7 +16,7 @@
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
@@ -32,14 +32,15 @@ static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
|
||||
blake2s_final(&s_ctx, (uint8_t*) output, BLAKE2S_OUTBYTES);
|
||||
}
|
||||
*/
|
||||
int scanhash_blake2s(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_blake2s( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
701
algo/blake/blake512-hash-4way.c
Normal file
701
algo/blake/blake512-hash-4way.c
Normal file
@@ -0,0 +1,701 @@
|
||||
/* $Id: blake.c 252 2011-06-07 17:55:14Z tp $ */
|
||||
/*
|
||||
* BLAKE implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BLAKE
|
||||
#define SPH_SMALL_FOOTPRINT_BLAKE 1
|
||||
#endif
|
||||
|
||||
#if SPH_64 && (SPH_SMALL_FOOTPRINT_BLAKE || !SPH_64_TRUE)
|
||||
#define SPH_COMPACT_BLAKE_64 1
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
|
||||
// Blake-512
|
||||
|
||||
static const sph_u64 IV512[8] = {
|
||||
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
|
||||
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
|
||||
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
|
||||
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
|
||||
};
|
||||
|
||||
|
||||
#if SPH_COMPACT_BLAKE_32 || SPH_COMPACT_BLAKE_64
|
||||
|
||||
// Blake-256 4 & 8 way, Blake-512 4 way
|
||||
|
||||
static const unsigned sigma[16][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#define Z00 0
|
||||
#define Z01 1
|
||||
#define Z02 2
|
||||
#define Z03 3
|
||||
#define Z04 4
|
||||
#define Z05 5
|
||||
#define Z06 6
|
||||
#define Z07 7
|
||||
#define Z08 8
|
||||
#define Z09 9
|
||||
#define Z0A A
|
||||
#define Z0B B
|
||||
#define Z0C C
|
||||
#define Z0D D
|
||||
#define Z0E E
|
||||
#define Z0F F
|
||||
|
||||
#define Z10 E
|
||||
#define Z11 A
|
||||
#define Z12 4
|
||||
#define Z13 8
|
||||
#define Z14 9
|
||||
#define Z15 F
|
||||
#define Z16 D
|
||||
#define Z17 6
|
||||
#define Z18 1
|
||||
#define Z19 C
|
||||
#define Z1A 0
|
||||
#define Z1B 2
|
||||
#define Z1C B
|
||||
#define Z1D 7
|
||||
#define Z1E 5
|
||||
#define Z1F 3
|
||||
|
||||
#define Z20 B
|
||||
#define Z21 8
|
||||
#define Z22 C
|
||||
#define Z23 0
|
||||
#define Z24 5
|
||||
#define Z25 2
|
||||
#define Z26 F
|
||||
#define Z27 D
|
||||
#define Z28 A
|
||||
#define Z29 E
|
||||
#define Z2A 3
|
||||
#define Z2B 6
|
||||
#define Z2C 7
|
||||
#define Z2D 1
|
||||
#define Z2E 9
|
||||
#define Z2F 4
|
||||
|
||||
#define Z30 7
|
||||
#define Z31 9
|
||||
#define Z32 3
|
||||
#define Z33 1
|
||||
#define Z34 D
|
||||
#define Z35 C
|
||||
#define Z36 B
|
||||
#define Z37 E
|
||||
#define Z38 2
|
||||
#define Z39 6
|
||||
#define Z3A 5
|
||||
#define Z3B A
|
||||
#define Z3C 4
|
||||
#define Z3D 0
|
||||
#define Z3E F
|
||||
#define Z3F 8
|
||||
|
||||
#define Z40 9
|
||||
#define Z41 0
|
||||
#define Z42 5
|
||||
#define Z43 7
|
||||
#define Z44 2
|
||||
#define Z45 4
|
||||
#define Z46 A
|
||||
#define Z47 F
|
||||
#define Z48 E
|
||||
#define Z49 1
|
||||
#define Z4A B
|
||||
#define Z4B C
|
||||
#define Z4C 6
|
||||
#define Z4D 8
|
||||
#define Z4E 3
|
||||
#define Z4F D
|
||||
|
||||
#define Z50 2
|
||||
#define Z51 C
|
||||
#define Z52 6
|
||||
#define Z53 A
|
||||
#define Z54 0
|
||||
#define Z55 B
|
||||
#define Z56 8
|
||||
#define Z57 3
|
||||
#define Z58 4
|
||||
#define Z59 D
|
||||
#define Z5A 7
|
||||
#define Z5B 5
|
||||
#define Z5C F
|
||||
#define Z5D E
|
||||
#define Z5E 1
|
||||
#define Z5F 9
|
||||
|
||||
#define Z60 C
|
||||
#define Z61 5
|
||||
#define Z62 1
|
||||
#define Z63 F
|
||||
#define Z64 E
|
||||
#define Z65 D
|
||||
#define Z66 4
|
||||
#define Z67 A
|
||||
#define Z68 0
|
||||
#define Z69 7
|
||||
#define Z6A 6
|
||||
#define Z6B 3
|
||||
#define Z6C 9
|
||||
#define Z6D 2
|
||||
#define Z6E 8
|
||||
#define Z6F B
|
||||
|
||||
#define Z70 D
|
||||
#define Z71 B
|
||||
#define Z72 7
|
||||
#define Z73 E
|
||||
#define Z74 C
|
||||
#define Z75 1
|
||||
#define Z76 3
|
||||
#define Z77 9
|
||||
#define Z78 5
|
||||
#define Z79 0
|
||||
#define Z7A F
|
||||
#define Z7B 4
|
||||
#define Z7C 8
|
||||
#define Z7D 6
|
||||
#define Z7E 2
|
||||
#define Z7F A
|
||||
|
||||
#define Z80 6
|
||||
#define Z81 F
|
||||
#define Z82 E
|
||||
#define Z83 9
|
||||
#define Z84 B
|
||||
#define Z85 3
|
||||
#define Z86 0
|
||||
#define Z87 8
|
||||
#define Z88 C
|
||||
#define Z89 2
|
||||
#define Z8A D
|
||||
#define Z8B 7
|
||||
#define Z8C 1
|
||||
#define Z8D 4
|
||||
#define Z8E A
|
||||
#define Z8F 5
|
||||
|
||||
#define Z90 A
|
||||
#define Z91 2
|
||||
#define Z92 8
|
||||
#define Z93 4
|
||||
#define Z94 7
|
||||
#define Z95 6
|
||||
#define Z96 1
|
||||
#define Z97 5
|
||||
#define Z98 F
|
||||
#define Z99 B
|
||||
#define Z9A 9
|
||||
#define Z9B E
|
||||
#define Z9C 3
|
||||
#define Z9D C
|
||||
#define Z9E D
|
||||
#define Z9F 0
|
||||
|
||||
#define Mx(r, i) Mx_(Z ## r ## i)
|
||||
#define Mx_(n) Mx__(n)
|
||||
#define Mx__(n) M ## n
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
#define CBx(r, i) CBx_(Z ## r ## i)
|
||||
#define CBx_(n) CBx__(n)
|
||||
#define CBx__(n) CB ## n
|
||||
|
||||
#define CB0 SPH_C64(0x243F6A8885A308D3)
|
||||
#define CB1 SPH_C64(0x13198A2E03707344)
|
||||
#define CB2 SPH_C64(0xA4093822299F31D0)
|
||||
#define CB3 SPH_C64(0x082EFA98EC4E6C89)
|
||||
#define CB4 SPH_C64(0x452821E638D01377)
|
||||
#define CB5 SPH_C64(0xBE5466CF34E90C6C)
|
||||
#define CB6 SPH_C64(0xC0AC29B7C97C50DD)
|
||||
#define CB7 SPH_C64(0x3F84D5B5B5470917)
|
||||
#define CB8 SPH_C64(0x9216D5D98979FB1B)
|
||||
#define CB9 SPH_C64(0xD1310BA698DFB5AC)
|
||||
#define CBA SPH_C64(0x2FFD72DBD01ADFB7)
|
||||
#define CBB SPH_C64(0xB8E1AFED6A267E96)
|
||||
#define CBC SPH_C64(0xBA7C9045F12C7F99)
|
||||
#define CBD SPH_C64(0x24A19947B3916CF7)
|
||||
#define CBE SPH_C64(0x0801F2E2858EFC16)
|
||||
#define CBF SPH_C64(0x636920D871574E69)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_64
|
||||
// not used
|
||||
static const sph_u64 CB[16] = {
|
||||
SPH_C64(0x243F6A8885A308D3), SPH_C64(0x13198A2E03707344),
|
||||
SPH_C64(0xA4093822299F31D0), SPH_C64(0x082EFA98EC4E6C89),
|
||||
SPH_C64(0x452821E638D01377), SPH_C64(0xBE5466CF34E90C6C),
|
||||
SPH_C64(0xC0AC29B7C97C50DD), SPH_C64(0x3F84D5B5B5470917),
|
||||
SPH_C64(0x9216D5D98979FB1B), SPH_C64(0xD1310BA698DFB5AC),
|
||||
SPH_C64(0x2FFD72DBD01ADFB7), SPH_C64(0xB8E1AFED6A267E96),
|
||||
SPH_C64(0xBA7C9045F12C7F99), SPH_C64(0x24A19947B3916CF7),
|
||||
SPH_C64(0x0801F2E2858EFC16), SPH_C64(0x636920D871574E69)
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
#define GB_4WAY(m0, m1, c0, c1, a, b, c, d) do { \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c1, c1, c1, c1 ), m0 ), b ), a ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 32 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 25 ); \
|
||||
a = _mm256_add_epi64( _mm256_add_epi64( _mm256_xor_si256( \
|
||||
_mm256_set_epi64x( c0, c0, c0, c0 ), m1 ), b ), a ); \
|
||||
d = mm256_ror_64( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi64( c, d ); \
|
||||
b = mm256_ror_64( _mm256_xor_si256( b, c ), 11 ); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_64
|
||||
// not used
|
||||
#define ROUND_B_4WAY(r) do { \
|
||||
GB_4WAY(M[sigma[r][0x0]], M[sigma[r][0x1]], \
|
||||
CB[sigma[r][0x0]], CB[sigma[r][0x1]], V0, V4, V8, VC); \
|
||||
GB_4WAY(M[sigma[r][0x2]], M[sigma[r][0x3]], \
|
||||
CB[sigma[r][0x2]], CB[sigma[r][0x3]], V1, V5, V9, VD); \
|
||||
GB_4WAY(M[sigma[r][0x4]], M[sigma[r][0x5]], \
|
||||
CB[sigma[r][0x4]], CB[sigma[r][0x5]], V2, V6, VA, VE); \
|
||||
GB_4WAY(M[sigma[r][0x6]], M[sigma[r][0x7]], \
|
||||
CB[sigma[r][0x6]], CB[sigma[r][0x7]], V3, V7, VB, VF); \
|
||||
GB_4WAY(M[sigma[r][0x8]], M[sigma[r][0x9]], \
|
||||
CB[sigma[r][0x8]], CB[sigma[r][0x9]], V0, V5, VA, VF); \
|
||||
GB_4WAY(M[sigma[r][0xA]], M[sigma[r][0xB]], \
|
||||
CB[sigma[r][0xA]], CB[sigma[r][0xB]], V1, V6, VB, VC); \
|
||||
GB_4WAY(M[sigma[r][0xC]], M[sigma[r][0xD]], \
|
||||
CB[sigma[r][0xC]], CB[sigma[r][0xD]], V2, V7, V8, VD); \
|
||||
GB_4WAY(M[sigma[r][0xE]], M[sigma[r][0xF]], \
|
||||
CB[sigma[r][0xE]], CB[sigma[r][0xF]], V3, V4, V9, VE); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
//current_impl
|
||||
#define ROUND_B_4WAY(r) do { \
|
||||
GB_4WAY(Mx(r, 0), Mx(r, 1), CBx(r, 0), CBx(r, 1), V0, V4, V8, VC); \
|
||||
GB_4WAY(Mx(r, 2), Mx(r, 3), CBx(r, 2), CBx(r, 3), V1, V5, V9, VD); \
|
||||
GB_4WAY(Mx(r, 4), Mx(r, 5), CBx(r, 4), CBx(r, 5), V2, V6, VA, VE); \
|
||||
GB_4WAY(Mx(r, 6), Mx(r, 7), CBx(r, 6), CBx(r, 7), V3, V7, VB, VF); \
|
||||
GB_4WAY(Mx(r, 8), Mx(r, 9), CBx(r, 8), CBx(r, 9), V0, V5, VA, VF); \
|
||||
GB_4WAY(Mx(r, A), Mx(r, B), CBx(r, A), CBx(r, B), V1, V6, VB, VC); \
|
||||
GB_4WAY(Mx(r, C), Mx(r, D), CBx(r, C), CBx(r, D), V2, V7, V8, VD); \
|
||||
GB_4WAY(Mx(r, E), Mx(r, F), CBx(r, E), CBx(r, F), V3, V4, V9, VE); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
#define DECL_STATE64_4WAY \
|
||||
__m256i H0, H1, H2, H3, H4, H5, H6, H7; \
|
||||
__m256i S0, S1, S2, S3; \
|
||||
sph_u64 T0, T1;
|
||||
|
||||
#define READ_STATE64_4WAY(state) do { \
|
||||
H0 = (state)->H[0]; \
|
||||
H1 = (state)->H[1]; \
|
||||
H2 = (state)->H[2]; \
|
||||
H3 = (state)->H[3]; \
|
||||
H4 = (state)->H[4]; \
|
||||
H5 = (state)->H[5]; \
|
||||
H6 = (state)->H[6]; \
|
||||
H7 = (state)->H[7]; \
|
||||
S0 = (state)->S[0]; \
|
||||
S1 = (state)->S[1]; \
|
||||
S2 = (state)->S[2]; \
|
||||
S3 = (state)->S[3]; \
|
||||
T0 = (state)->T0; \
|
||||
T1 = (state)->T1; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE64_4WAY(state) do { \
|
||||
(state)->H[0] = H0; \
|
||||
(state)->H[1] = H1; \
|
||||
(state)->H[2] = H2; \
|
||||
(state)->H[3] = H3; \
|
||||
(state)->H[4] = H4; \
|
||||
(state)->H[5] = H5; \
|
||||
(state)->H[6] = H6; \
|
||||
(state)->H[7] = H7; \
|
||||
(state)->S[0] = S0; \
|
||||
(state)->S[1] = S1; \
|
||||
(state)->S[2] = S2; \
|
||||
(state)->S[3] = S3; \
|
||||
(state)->T0 = T0; \
|
||||
(state)->T1 = T1; \
|
||||
} while (0)
|
||||
|
||||
#if SPH_COMPACT_BLAKE_64
|
||||
|
||||
// not used
|
||||
#define COMPRESS64_4WAY do { \
|
||||
__m256i M[16]; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
unsigned r; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = H4; \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set_epi64x( CB0, CB0, CB0, CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set_epi64x( CB1, CB1, CB1, CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set_epi64x( CB2, CB2, CB2, CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set_epi64x( CB3, CB3, CB3, CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB4, CB4, CB4, CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB5, CB5, CB5, CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB6, CB6, CB6, CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB7, CB7, CB7, CB7 ) ); \
|
||||
M[0x0] = mm256_bswap_64( *(buf+0) ); \
|
||||
M[0x1] = mm256_bswap_64( *(buf+1) ); \
|
||||
M[0x2] = mm256_bswap_64( *(buf+2) ); \
|
||||
M[0x3] = mm256_bswap_64( *(buf+3) ); \
|
||||
M[0x4] = mm256_bswap_64( *(buf+4) ); \
|
||||
M[0x5] = mm256_bswap_64( *(buf+5) ); \
|
||||
M[0x6] = mm256_bswap_64( *(buf+6) ); \
|
||||
M[0x7] = mm256_bswap_64( *(buf+7) ); \
|
||||
M[0x8] = mm256_bswap_64( *(buf+8) ); \
|
||||
M[0x9] = mm256_bswap_64( *(buf+9) ); \
|
||||
M[0xA] = mm256_bswap_64( *(buf+10) ); \
|
||||
M[0xB] = mm256_bswap_64( *(buf+11) ); \
|
||||
M[0xC] = mm256_bswap_64( *(buf+12) ); \
|
||||
M[0xD] = mm256_bswap_64( *(buf+13) ); \
|
||||
M[0xE] = mm256_bswap_64( *(buf+14) ); \
|
||||
M[0xF] = mm256_bswap_64( *(buf+15) ); \
|
||||
for (r = 0; r < 16; r ++) \
|
||||
ROUND_B_4WAY(r); \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S0, V0 ), V8 ), H0 ); \
|
||||
H1 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S1, V1 ), V9 ), H1 ); \
|
||||
H2 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S2, V2 ), VA ), H2 ); \
|
||||
H3 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S3, V3 ), VB ), H3 ); \
|
||||
H4 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S0, V4 ), VC ), H4 ); \
|
||||
H5 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S1, V5 ), VD ), H5 ); \
|
||||
H6 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S2, V6 ), VE ), H6 ); \
|
||||
H7 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S3, V7 ), VF ), H7 ); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
//current impl
|
||||
|
||||
#define COMPRESS64_4WAY do \
|
||||
{ \
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m256i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
__m256i shuf_bswap64; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = H4; \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set1_epi64x( CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set1_epi64x( CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set1_epi64x( CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set1_epi64x( CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
|
||||
_mm256_set1_epi64x( CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
|
||||
_mm256_set1_epi64x( CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
_mm256_set1_epi64x( CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
_mm256_set1_epi64x( CB7 ) ); \
|
||||
shuf_bswap64 = _mm256_set_epi64x( 0x08090a0b0c0d0e0f, 0x0001020304050607, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
|
||||
M0 = _mm256_shuffle_epi8( *(buf+ 0), shuf_bswap64 ); \
|
||||
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap64 ); \
|
||||
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap64 ); \
|
||||
M3 = _mm256_shuffle_epi8( *(buf+ 3), shuf_bswap64 ); \
|
||||
M4 = _mm256_shuffle_epi8( *(buf+ 4), shuf_bswap64 ); \
|
||||
M5 = _mm256_shuffle_epi8( *(buf+ 5), shuf_bswap64 ); \
|
||||
M6 = _mm256_shuffle_epi8( *(buf+ 6), shuf_bswap64 ); \
|
||||
M7 = _mm256_shuffle_epi8( *(buf+ 7), shuf_bswap64 ); \
|
||||
M8 = _mm256_shuffle_epi8( *(buf+ 8), shuf_bswap64 ); \
|
||||
M9 = _mm256_shuffle_epi8( *(buf+ 9), shuf_bswap64 ); \
|
||||
MA = _mm256_shuffle_epi8( *(buf+10), shuf_bswap64 ); \
|
||||
MB = _mm256_shuffle_epi8( *(buf+11), shuf_bswap64 ); \
|
||||
MC = _mm256_shuffle_epi8( *(buf+12), shuf_bswap64 ); \
|
||||
MD = _mm256_shuffle_epi8( *(buf+13), shuf_bswap64 ); \
|
||||
ME = _mm256_shuffle_epi8( *(buf+14), shuf_bswap64 ); \
|
||||
MF = _mm256_shuffle_epi8( *(buf+15), shuf_bswap64 ); \
|
||||
ROUND_B_4WAY(0); \
|
||||
ROUND_B_4WAY(1); \
|
||||
ROUND_B_4WAY(2); \
|
||||
ROUND_B_4WAY(3); \
|
||||
ROUND_B_4WAY(4); \
|
||||
ROUND_B_4WAY(5); \
|
||||
ROUND_B_4WAY(6); \
|
||||
ROUND_B_4WAY(7); \
|
||||
ROUND_B_4WAY(8); \
|
||||
ROUND_B_4WAY(9); \
|
||||
ROUND_B_4WAY(0); \
|
||||
ROUND_B_4WAY(1); \
|
||||
ROUND_B_4WAY(2); \
|
||||
ROUND_B_4WAY(3); \
|
||||
ROUND_B_4WAY(4); \
|
||||
ROUND_B_4WAY(5); \
|
||||
H0 = mm256_xor4( V8, V0, S0, H0 ); \
|
||||
H1 = mm256_xor4( V9, V1, S1, H1 ); \
|
||||
H2 = mm256_xor4( VA, V2, S2, H2 ); \
|
||||
H3 = mm256_xor4( VB, V3, S3, H3 ); \
|
||||
H4 = mm256_xor4( VC, V4, S0, H4 ); \
|
||||
H5 = mm256_xor4( VD, V5, S1, H5 ); \
|
||||
H6 = mm256_xor4( VE, V6, S2, H6 ); \
|
||||
H7 = mm256_xor4( VF, V7, S3, H7 ); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
static const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 };
|
||||
|
||||
static void
|
||||
blake64_4way_init( blake_4way_big_context *sc, const sph_u64 *iv,
|
||||
const sph_u64 *salt )
|
||||
{
|
||||
__m256i zero = m256_zero;
|
||||
casti_m256i( sc->H, 0 ) = _mm256_set1_epi64x( iv[0] );
|
||||
casti_m256i( sc->H, 1 ) = _mm256_set1_epi64x( iv[1] );
|
||||
casti_m256i( sc->H, 2 ) = _mm256_set1_epi64x( iv[2] );
|
||||
casti_m256i( sc->H, 3 ) = _mm256_set1_epi64x( iv[3] );
|
||||
casti_m256i( sc->H, 4 ) = _mm256_set1_epi64x( iv[4] );
|
||||
casti_m256i( sc->H, 5 ) = _mm256_set1_epi64x( iv[5] );
|
||||
casti_m256i( sc->H, 6 ) = _mm256_set1_epi64x( iv[6] );
|
||||
casti_m256i( sc->H, 7 ) = _mm256_set1_epi64x( iv[7] );
|
||||
|
||||
casti_m256i( sc->S, 0 ) = zero;
|
||||
casti_m256i( sc->S, 1 ) = zero;
|
||||
casti_m256i( sc->S, 2 ) = zero;
|
||||
casti_m256i( sc->S, 3 ) = zero;
|
||||
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
blake64_4way( blake_4way_big_context *sc, const void *data, size_t len)
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buf;
|
||||
size_t ptr;
|
||||
DECL_STATE64_4WAY
|
||||
|
||||
const int buf_size = 128; // sizeof/8
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if ( len < (buf_size - ptr) )
|
||||
{
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE64_4WAY(sc);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
if (ptr == buf_size )
|
||||
{
|
||||
if ((T0 = SPH_T64(T0 + 1024)) < 1024)
|
||||
T1 = SPH_T64(T1 + 1);
|
||||
COMPRESS64_4WAY;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE64_4WAY(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
blake64_4way_close( blake_4way_big_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w64)
|
||||
{
|
||||
__m256i buf[16];
|
||||
size_t ptr;
|
||||
unsigned bit_len;
|
||||
uint64_t z, zz;
|
||||
sph_u64 th, tl;
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
z = 0x80 >> n;
|
||||
zz = ((ub & -z) | z) & 0xFF;
|
||||
buf[ptr>>3] = _mm256_set_epi64x( zz, zz, zz, zz );
|
||||
tl = sc->T0 + bit_len;
|
||||
th = sc->T1;
|
||||
if (ptr == 0 )
|
||||
{
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
|
||||
}
|
||||
else if ( sc->T0 == 0 )
|
||||
{
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL) + bit_len;
|
||||
sc->T1 = SPH_T64(sc->T1 - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
sc->T0 -= 1024 - bit_len;
|
||||
}
|
||||
if ( ptr <= 104 )
|
||||
{
|
||||
memset_zero_256( buf + (ptr>>3) + 1, (104-ptr) >> 3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
buf[(104>>3)] = _mm256_or_si256( buf[(104>>3)],
|
||||
_mm256_set1_epi64x( 0x0100000000000000ULL ) );
|
||||
*(buf+(112>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(buf+(120>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
|
||||
blake64_4way( sc, buf + (ptr>>3), 128 - ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_256( buf + (ptr>>3) + 1, (120 - ptr) >> 3 );
|
||||
|
||||
blake64_4way( sc, buf + (ptr>>3), 128 - ptr );
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00ULL);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFFULL);
|
||||
memset_zero_256( buf, 112>>3 );
|
||||
if ( out_size_w64 == 8 )
|
||||
buf[104>>3] = _mm256_set1_epi64x( 0x0100000000000000ULL );
|
||||
*(buf+(112>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( th, th, th, th ) );
|
||||
*(buf+(120>>3)) = mm256_bswap_64(
|
||||
_mm256_set_epi64x( tl, tl, tl, tl ) );
|
||||
|
||||
blake64_4way( sc, buf, 128 );
|
||||
}
|
||||
mm256_block_bswap_64( (__m256i*)dst, sc->H );
|
||||
}
|
||||
|
||||
void
|
||||
blake512_4way_init(void *cc)
|
||||
{
|
||||
blake64_4way_init(cc, IV512, salt_zero_big);
|
||||
}
|
||||
|
||||
void
|
||||
blake512_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
blake64_4way(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
blake512_4way_close(void *cc, void *dst)
|
||||
{
|
||||
blake512_4way_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
void
|
||||
blake512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
blake64_4way_close(cc, ub, n, dst, 8);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -17,11 +17,11 @@ void blakecoin_4way_hash(void *state, const void *input)
|
||||
blake256r8_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4way_close( &ctx, vhash );
|
||||
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
@@ -29,41 +29,34 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
if ( opt_benchmark )
|
||||
HTarget = 0x7f;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256r8_4way_init( &blakecoin_4w_ctx );
|
||||
blake256r8_4way( &blakecoin_4w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
blakecoin_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -81,13 +74,12 @@ void blakecoin_8way_hash( void *state, const void *input )
|
||||
blake256r8_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r8_8way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
dintrlv_8x32( state, state+ 32, state+ 64, state+ 96, state+128,
|
||||
state+160, state+192, state+224, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
@@ -95,46 +87,34 @@ int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
int num_found = 0;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
if ( opt_benchmark )
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake256r8_8way_init( &blakecoin_8w_ctx );
|
||||
blake256r8_8way( &blakecoin_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
blakecoin_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -13,18 +13,18 @@
|
||||
|
||||
#if defined (BLAKECOIN_8WAY)
|
||||
void blakecoin_8way_hash(void *state, const void *input);
|
||||
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
void blakecoin_4way_hash(void *state, const void *input);
|
||||
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void blakecoinhash( void *state, const void *input );
|
||||
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
@@ -39,13 +39,14 @@ void blakecoinhash( void *state, const void *input )
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[20];
|
||||
|
@@ -23,11 +23,11 @@ void decred_hash_4way( void *state, const void *input )
|
||||
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
|
||||
blake256_4way( &ctx, tail, tail_len );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[48*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
@@ -37,14 +37,13 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// copy to buffer guaranteed to be aligned.
|
||||
memcpy( edata, pdata, 180 );
|
||||
|
||||
// use the old way until new way updated for size.
|
||||
mm_interleave_4x32x( vdata, edata, edata, edata, edata, 180*8 );
|
||||
mm128_intrlv_4x32x( vdata, edata, edata, edata, edata, 180*8 );
|
||||
|
||||
blake256_4way_init( &blake_mid );
|
||||
blake256_4way( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
|
||||
@@ -59,18 +58,17 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
decred_hash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= HTarget )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[DECRED_NONCE_INDEX] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -140,6 +140,7 @@ bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
return true;
|
||||
}
|
||||
|
||||
int decred_get_work_data_size() { return DECRED_DATA_SIZE; }
|
||||
|
||||
bool register_decred_algo( algo_gate_t* gate )
|
||||
{
|
||||
@@ -154,7 +155,7 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->display_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
@@ -163,7 +164,7 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->nbits_index = DECRED_NBITS_INDEX;
|
||||
gate->ntime_index = DECRED_NTIME_INDEX;
|
||||
gate->nonce_index = DECRED_NONCE_INDEX;
|
||||
gate->work_data_size = DECRED_DATA_SIZE;
|
||||
gate->get_work_data_size = (void*)&decred_get_work_data_size;
|
||||
gate->work_cmp_size = DECRED_WORK_COMPARE_SIZE;
|
||||
allow_mininginfo = false;
|
||||
have_gbt = false;
|
||||
|
@@ -14,7 +14,7 @@
|
||||
|
||||
#if defined (__AVX2__)
|
||||
//void blakehash_84way(void *state, const void *input);
|
||||
//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
//int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
@@ -24,13 +24,13 @@
|
||||
|
||||
#if defined (DECRED_4WAY)
|
||||
void decred_hash_4way(void *state, const void *input);
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void decred_hash( void *state, const void *input );
|
||||
int scanhash_decred( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_decred( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -52,12 +52,14 @@ void decred_hash_simple(void *state, const void *input)
|
||||
sph_blake256_close(&ctx, state);
|
||||
}
|
||||
|
||||
int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_decred( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[48];
|
||||
uint32_t _ALIGN(64) hash32[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// #define DCR_NONCE_OFT32 35
|
||||
|
||||
@@ -268,7 +270,7 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&decred_hash;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->display_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
|
@@ -10,13 +10,8 @@
|
||||
#include "blake-hash-4way.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
//#define DEBUG_ALGO
|
||||
|
||||
extern void pentablakehash_4way( void *output, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(32) hash[128];
|
||||
// // same as uint32_t hashA[16], hashB[16];
|
||||
// #define hashB hash+64
|
||||
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
@@ -30,21 +25,6 @@ extern void pentablakehash_4way( void *output, const void *input )
|
||||
blake512_4way( &ctx, input, 80 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
uint64_t sin0[10], sin1[10], sin2[10], sin3[10];
|
||||
mm256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 );
|
||||
sph_blake512_context ctx2_blake;
|
||||
sph_blake512_init(&ctx2_blake);
|
||||
sph_blake512(&ctx2_blake, sin0, 80);
|
||||
sph_blake512_close(&ctx2_blake, (void*) hash);
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
uint64_t* hash64 = (uint64_t*)hash;
|
||||
for( int i = 0; i < 8; i++ )
|
||||
{
|
||||
if ( hash0[i] != hash64[i] )
|
||||
printf("hash mismatch %u\n",i);
|
||||
}
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
@@ -61,46 +41,14 @@ for( int i = 0; i < 8; i++ )
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
|
||||
/*
|
||||
uint64_t sin0[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin1[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin2[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin3[10] __attribute__ ((aligned (64)));
|
||||
|
||||
sph_blake512_context ctx_blake;
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, input, 80);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
int scanhash_pentablake_4way( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -110,9 +58,8 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// uint32_t _ALIGN(32) hash64[8];
|
||||
// uint32_t _ALIGN(32) endiandata[32];
|
||||
@@ -138,7 +85,7 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
@@ -155,10 +102,10 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( !( (hash+(i<<3))[7] & mask )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
pdata[19] = n + i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
|
@@ -10,12 +10,12 @@
|
||||
|
||||
#if defined(PENTABLAKE_4WAY)
|
||||
void pentablakehash_4way( void *state, const void *input );
|
||||
int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done );
|
||||
int scanhash_pentablake_4way( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void pentablakehash( void *state, const void *input );
|
||||
int scanhash_pentablake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_pentablake( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
|
@@ -40,8 +40,8 @@ extern void pentablakehash(void *output, const void *input)
|
||||
|
||||
}
|
||||
|
||||
int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_pentablake( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -49,6 +49,7 @@ int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[32];
|
||||
|
@@ -41,15 +41,18 @@ extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_bmw256 256
|
||||
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
#if defined(__SSE2__)
|
||||
|
||||
// BMW-256 4 way 32
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[64];
|
||||
__m128i H[16];
|
||||
@@ -59,6 +62,60 @@ typedef struct {
|
||||
|
||||
typedef bmw_4way_small_context bmw256_4way_context;
|
||||
|
||||
void bmw256_4way_init(void *cc);
|
||||
|
||||
void bmw256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void bmw256_4way_close(void *cc, void *dst);
|
||||
|
||||
void bmw256_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif // __SSE2__
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// BMW-256 8 way 32
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[64];
|
||||
__m256i H[16];
|
||||
size_t ptr;
|
||||
uint32_t bit_count; // assume bit_count fits in 32 bits
|
||||
} bmw_8way_small_context __attribute__ ((aligned (64)));
|
||||
|
||||
typedef bmw_8way_small_context bmw256_8way_context;
|
||||
|
||||
void bmw256_8way_init( bmw256_8way_context *ctx );
|
||||
void bmw256_8way( bmw256_8way_context *ctx, const void *data, size_t len );
|
||||
void bmw256_8way_close( bmw256_8way_context *ctx, void *dst );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__SSE2__)
|
||||
|
||||
// BMW-512 2 way 64
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[16];
|
||||
__m128i H[16];
|
||||
size_t ptr;
|
||||
uint64_t bit_count;
|
||||
} bmw_2way_big_context __attribute__ ((aligned (64)));
|
||||
|
||||
typedef bmw_2way_big_context bmw512_2way_context;
|
||||
|
||||
void bmw512_2way_init( bmw512_2way_context *ctx );
|
||||
void bmw512_2way( bmw512_2way_context *ctx, const void *data, size_t len );
|
||||
void bmw512_2way_close( bmw512_2way_context *ctx, void *dst );
|
||||
|
||||
#endif // __SSE2__
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// BMW-512 4 way 64
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16];
|
||||
__m256i H[16];
|
||||
@@ -68,14 +125,6 @@ typedef struct {
|
||||
|
||||
typedef bmw_4way_big_context bmw512_4way_context;
|
||||
|
||||
void bmw256_4way_init(void *cc);
|
||||
|
||||
void bmw256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void bmw256_4way_close(void *cc, void *dst);
|
||||
|
||||
void bmw256_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void bmw512_4way_init(void *cc);
|
||||
|
||||
@@ -86,10 +135,10 @@ void bmw512_4way_close(void *cc, void *dst);
|
||||
void bmw512_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
#endif // __AVX2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // BMW_HASH_H__
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -19,14 +19,15 @@ void bmwhash(void *output, const void *input)
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_bmw(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_bmw( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
int thr_id = mythr->id;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
59
algo/bmw/bmw512-4way.c
Normal file
59
algo/bmw/bmw512-4way.c
Normal file
@@ -0,0 +1,59 @@
|
||||
#include "bmw512-gate.h"
|
||||
|
||||
#ifdef BMW512_4WAY
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
//#include "sph_keccak.h"
|
||||
#include "bmw-hash-4way.h"
|
||||
|
||||
void bmw512hash_4way(void *state, const void *input)
|
||||
{
|
||||
bmw512_4way_context ctx;
|
||||
bmw512_4way_init( &ctx );
|
||||
bmw512_4way( &ctx, input, 80 );
|
||||
bmw512_4way_close( &ctx, state );
|
||||
}
|
||||
|
||||
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
// const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do {
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
bmw512hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
20
algo/bmw/bmw512-gate.c
Normal file
20
algo/bmw/bmw512-gate.c
Normal file
@@ -0,0 +1,20 @@
|
||||
#include "bmw512-gate.h"
|
||||
|
||||
int64_t bmw512_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_bmw512_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
gate->get_max64 = (void*)&bmw512_get_max64;
|
||||
#if defined (BMW512_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_bmw512_4way;
|
||||
gate->hash = (void*)&bmw512hash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_bmw512;
|
||||
gate->hash = (void*)&bmw512hash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
||||
|
23
algo/bmw/bmw512-gate.h
Normal file
23
algo/bmw/bmw512-gate.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef BMW512_GATE_H__
|
||||
#define BMW512_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define BMW512_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(BMW512_4WAY)
|
||||
|
||||
void bmw512hash_4way( void *state, const void *input );
|
||||
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
void bmw512hash( void *state, const void *input );
|
||||
int scanhash_bmw512( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
1073
algo/bmw/bmw512-hash-4way.c
Normal file
1073
algo/bmw/bmw512-hash-4way.c
Normal file
File diff suppressed because it is too large
Load Diff
53
algo/bmw/bmw512.c
Normal file
53
algo/bmw/bmw512.c
Normal file
@@ -0,0 +1,53 @@
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "sph_bmw.h"
|
||||
|
||||
void bmw512hash(void *state, const void *input)
|
||||
{
|
||||
sph_bmw512_context ctx;
|
||||
uint32_t hash[32];
|
||||
|
||||
sph_bmw512_init( &ctx );
|
||||
sph_bmw512( &ctx,input, 80 );
|
||||
sph_bmw512_close( &ctx, hash );
|
||||
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_bmw512( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t endiandata[32];
|
||||
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
|
||||
do {
|
||||
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
bmw512hash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFF00)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -242,6 +242,8 @@ void cryptolight_hash(void* output, const void* input, int len) {
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
#if defined(__AES__)
|
||||
|
||||
static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
|
||||
int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
@@ -312,8 +314,10 @@ static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
int scanhash_cryptolight(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
#endif
|
||||
|
||||
int scanhash_cryptolight( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -322,10 +326,11 @@ int scanhash_cryptolight(int thr_id, struct work *work,
|
||||
const uint32_t first_nonce = n + 1;
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
uint32_t _ALIGN(32) hash[HASH_SIZE / 4];
|
||||
int thr_id = mythr->id;
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#if defined(__AES__)
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
|
||||
|
@@ -1,14 +1,11 @@
|
||||
#if defined(__AES__)
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <memory.h>
|
||||
#include "cryptonight.h"
|
||||
#include "miner.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include <immintrin.h>
|
||||
//#include "avxdefs.h"
|
||||
|
||||
void aesni_parallel_noxor(uint8_t *long_state, uint8_t *text, uint8_t *ExpandedKey);
|
||||
void aesni_parallel_xor(uint8_t *text, uint8_t *ExpandedKey, uint8_t *long_state);
|
||||
void that_fucking_loop(uint8_t a[16], uint8_t b[16], uint8_t *long_state);
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
@@ -25,7 +22,6 @@ static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
@@ -37,14 +33,12 @@ static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
@@ -91,7 +85,6 @@ static inline void ExpandAESKey256(char *keybuf)
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
#endif
|
||||
}
|
||||
|
||||
// align to 64 byte cache line
|
||||
@@ -109,13 +102,19 @@ static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
|
||||
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
size_t i, j;
|
||||
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
@@ -214,7 +213,15 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
@@ -227,10 +234,14 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
: "cc" );
|
||||
|
||||
b_x = c_x;
|
||||
nextblock[0] = a[0] + hi;
|
||||
nextblock[1] = a[1] + lo;
|
||||
a[0] = b[0] ^ nextblock[0];
|
||||
a[1] = b[1] ^ nextblock[1];
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
a_x = _mm_load_si128( (__m128i*)a );
|
||||
c_x = _mm_load_si128( lsa );
|
||||
@@ -241,6 +252,14 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
@@ -251,8 +270,12 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
nextblock[0] = a[0] + hi;
|
||||
nextblock[1] = a[1] + lo;
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
@@ -330,5 +353,5 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
@@ -7,11 +7,11 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#endif
|
||||
|
||||
#else
|
||||
#include "crypto/c_groestl.h"
|
||||
#endif
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
@@ -30,12 +30,12 @@ void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
}
|
||||
|
||||
void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
#ifdef NO_AES_NI
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
#else
|
||||
#if defined(__AES__)
|
||||
hashState_groestl256 ctx;
|
||||
init_groestl256( &ctx, 32 );
|
||||
update_and_final_groestl256( &ctx, output, input, len * 8 );
|
||||
#else
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -52,34 +52,41 @@ void (* const extra_hashes[4])( const void *, size_t, char *) =
|
||||
|
||||
void cryptonight_hash( void *restrict output, const void *input, int len )
|
||||
{
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
cryptonight_hash_ctx ( output, input, len );
|
||||
#else
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, len );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, len );
|
||||
#endif
|
||||
}
|
||||
|
||||
void cryptonight_hash_suw( void *restrict output, const void *input )
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
cryptonight_hash_ctx ( output, input, 76 );
|
||||
#else
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, 76 );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, 76 );
|
||||
#endif
|
||||
}
|
||||
|
||||
int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
bool cryptonightV7 = false;
|
||||
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
const uint32_t first_nonce = n + 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[32 / 4] __attribute__((aligned(32)));
|
||||
|
||||
// if ( ( cryptonightV7 && ( *(uint8_t*)pdata < 7 ) )
|
||||
// || ( !cryptonightV7 && ( *(uint8_t*)pdata == 7 ) ) )
|
||||
// applog(LOG_WARNING,"Cryptonight variant mismatch, shares may be rejected.");
|
||||
|
||||
do
|
||||
{
|
||||
*nonceptr = ++n;
|
||||
@@ -87,6 +94,7 @@ int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if (unlikely( hash[7] < Htarg ))
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
// work_set_target_ratio( work, hash );
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
@@ -97,6 +105,7 @@ int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
{
|
||||
cryptonightV7 = false;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
@@ -106,3 +115,15 @@ bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_cryptonightv7_algo( algo_gate_t* gate )
|
||||
{
|
||||
cryptonightV7 = true;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -20,8 +20,8 @@
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
#include "crypto/hash-ops.h"
|
||||
//#include "cryptonight.h"
|
||||
//#include "crypto/hash-ops.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
@@ -51,6 +51,7 @@ typedef __uint128_t uint128_t;
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
/*
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
@@ -78,6 +79,7 @@ static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
*/
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
@@ -120,9 +122,11 @@ static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* pro
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
/*
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
*/
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
@@ -132,14 +136,16 @@ static inline size_t e2i(const uint8_t* a) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
|
||||
static inline void mul_sum_xor_dst( const uint8_t* a, uint8_t* c, uint8_t* dst,
|
||||
const uint64_t tweak )
|
||||
{
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = lo;
|
||||
((uint64_t*) dst)[1] = cryptonightV7 ? lo ^ tweak : lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
@@ -174,8 +180,16 @@ static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
{
|
||||
hash_process(&ctx.state.hs, (const uint8_t*) input, len);
|
||||
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
// hash_process(&ctx.state.hs, (const uint8_t*) input, len);
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
__builtin_prefetch( ctx.text + 64, 0, 3 );
|
||||
@@ -211,23 +225,44 @@ void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
xor_blocks_dst(&ctx.state.k[0], &ctx.state.k[32], ctx.a);
|
||||
xor_blocks_dst(&ctx.state.k[16], &ctx.state.k[48], ctx.b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
|
||||
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
|
||||
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)]);
|
||||
for (i = 0; likely(i < ITER / 4); ++i)
|
||||
{
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
|
||||
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)], tweak );
|
||||
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
|
||||
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)], tweak );
|
||||
|
||||
}
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
@@ -266,7 +301,8 @@ void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
aesb_pseudo_round_mut(&ctx.text[7 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx.state.hs);
|
||||
// hash_permutation(&ctx.state.hs);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx.aes_ctx);
|
||||
|
@@ -40,10 +40,12 @@ void cryptonight_hash_ctx(void* output, const void* input, int len);
|
||||
void keccakf(uint64_t st[25], int rounds);
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len );
|
||||
|
||||
extern bool cryptonightV7;
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -7,12 +7,32 @@
|
||||
|
||||
// 2x128
|
||||
|
||||
/*
|
||||
// The result of hashing 10 rounds of initial data which consists of params
|
||||
// zero padded.
|
||||
static const uint64_t IV256[] =
|
||||
{
|
||||
0xCCD6F29FEA2BD4B4, 0x35481EAE63117E71, 0xE5D94E6322512D5B, 0xF4CC12BE7E624131,
|
||||
0x42AF2070C2D0B696, 0x3361DA8CD0720C35, 0x8EF8AD8328CCECA4, 0x40E5FBAB4680AC00,
|
||||
0x6107FBD5D89041C3, 0xF0B266796C859D41, 0x5FA2560309392549, 0x93CB628565C892FD,
|
||||
0x9E4B4E602AF2B5AE, 0x85254725774ABFDD, 0x4AB6AAD615815AEB, 0xD6032C0A9CDAF8AF
|
||||
};
|
||||
|
||||
static const uint64_t IV512[] =
|
||||
{
|
||||
0x50F494D42AEA2A61, 0x4167D83E2D538B8B, 0xC701CF8C3FEE2313, 0x50AC5695CC39968E,
|
||||
0xA647A8B34D42C787, 0x825B453797CF0BEF, 0xF22090C4EEF864D2, 0xA23911AED0E5CD33,
|
||||
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
|
||||
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
|
||||
};
|
||||
*/
|
||||
|
||||
static void transform_2way( cube_2way_context *sp )
|
||||
{
|
||||
int r;
|
||||
const int rounds = sp->rounds;
|
||||
|
||||
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
|
||||
__m256i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1;
|
||||
|
||||
x0 = _mm256_load_si256( (__m256i*)sp->h );
|
||||
x1 = _mm256_load_si256( (__m256i*)sp->h + 1 );
|
||||
@@ -29,50 +49,38 @@ static void transform_2way( cube_2way_context *sp )
|
||||
x5 = _mm256_add_epi32( x1, x5 );
|
||||
x6 = _mm256_add_epi32( x2, x6 );
|
||||
x7 = _mm256_add_epi32( x3, x7 );
|
||||
y0 = x2;
|
||||
y1 = x3;
|
||||
y2 = x0;
|
||||
y3 = x1;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 7 ),
|
||||
_mm256_srli_epi32( y1, 25 ) );
|
||||
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 7 ),
|
||||
_mm256_srli_epi32( y2, 25 ) );
|
||||
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 7 ),
|
||||
_mm256_srli_epi32( y3, 25 ) );
|
||||
y0 = x0;
|
||||
y1 = x1;
|
||||
x0 = mm256_rol_32( x2, 7 );
|
||||
x1 = mm256_rol_32( x3, 7 );
|
||||
x2 = mm256_rol_32( y0, 7 );
|
||||
x3 = mm256_rol_32( y1, 7 );
|
||||
x0 = _mm256_xor_si256( x0, x4 );
|
||||
x1 = _mm256_xor_si256( x1, x5 );
|
||||
x2 = _mm256_xor_si256( x2, x6 );
|
||||
x3 = _mm256_xor_si256( x3, x7 );
|
||||
x4 = mm256_swap128_64( x4 );
|
||||
x5 = mm256_swap128_64( x5 );
|
||||
x6 = mm256_swap128_64( x6 );
|
||||
x7 = mm256_swap128_64( x7 );
|
||||
x4 = mm256_swap64_128( x4 );
|
||||
x5 = mm256_swap64_128( x5 );
|
||||
x6 = mm256_swap64_128( x6 );
|
||||
x7 = mm256_swap64_128( x7 );
|
||||
x4 = _mm256_add_epi32( x0, x4 );
|
||||
x5 = _mm256_add_epi32( x1, x5 );
|
||||
x6 = _mm256_add_epi32( x2, x6 );
|
||||
x7 = _mm256_add_epi32( x3, x7 );
|
||||
y0 = x1;
|
||||
y1 = x0;
|
||||
y2 = x3;
|
||||
y3 = x2;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
_mm256_srli_epi32( y1, 21 ) );
|
||||
x2 = _mm256_xor_si256( _mm256_slli_epi32( y2, 11 ),
|
||||
_mm256_srli_epi32( y2, 21 ) );
|
||||
x3 = _mm256_xor_si256( _mm256_slli_epi32( y3, 11 ),
|
||||
_mm256_srli_epi32( y3, 21 ) );
|
||||
y0 = x0;
|
||||
y1 = x2;
|
||||
x0 = mm256_rol_32( x1, 11 );
|
||||
x1 = mm256_rol_32( y0, 11 );
|
||||
x2 = mm256_rol_32( x3, 11 );
|
||||
x3 = mm256_rol_32( y1, 11 );
|
||||
x0 = _mm256_xor_si256( x0, x4 );
|
||||
x1 = _mm256_xor_si256( x1, x5 );
|
||||
x2 = _mm256_xor_si256( x2, x6 );
|
||||
x3 = _mm256_xor_si256( x3, x7 );
|
||||
x4 = mm256_swap64_32( x4 );
|
||||
x5 = mm256_swap64_32( x5 );
|
||||
x6 = mm256_swap64_32( x6 );
|
||||
x7 = mm256_swap64_32( x7 );
|
||||
x4 = mm256_swap32_64( x4 );
|
||||
x5 = mm256_swap32_64( x5 );
|
||||
x6 = mm256_swap32_64( x6 );
|
||||
x7 = mm256_swap32_64( x7 );
|
||||
}
|
||||
|
||||
_mm256_store_si256( (__m256i*)sp->h, x0 );
|
||||
@@ -86,44 +94,50 @@ static void transform_2way( cube_2way_context *sp )
|
||||
|
||||
}
|
||||
|
||||
cube_2way_context cube_2way_ctx_cache __attribute__ ((aligned (64)));
|
||||
|
||||
int cube_2way_reinit( cube_2way_context *sp )
|
||||
{
|
||||
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int cube_2way_init( cube_2way_context *sp, int hashbitlen, int rounds,
|
||||
int blockbytes )
|
||||
int blockbytes )
|
||||
{
|
||||
int i;
|
||||
__m128i* h = (__m128i*)sp->h;
|
||||
sp->hashlen = hashbitlen/128;
|
||||
sp->blocksize = blockbytes/16;
|
||||
sp->rounds = rounds;
|
||||
sp->pos = 0;
|
||||
|
||||
// all sizes of __m128i
|
||||
cube_2way_ctx_cache.hashlen = hashbitlen/128;
|
||||
cube_2way_ctx_cache.blocksize = blockbytes/16;
|
||||
cube_2way_ctx_cache.rounds = rounds;
|
||||
cube_2way_ctx_cache.pos = 0;
|
||||
if ( hashbitlen == 512 )
|
||||
{
|
||||
|
||||
for ( i = 0; i < 8; ++i )
|
||||
cube_2way_ctx_cache.h[i] = m256_zero;
|
||||
|
||||
cube_2way_ctx_cache.h[0] = _mm256_set_epi32(
|
||||
0, rounds, blockbytes, hashbitlen / 8,
|
||||
0, rounds, blockbytes, hashbitlen / 8 );
|
||||
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
|
||||
memcpy( sp, &cube_2way_ctx_cache, sizeof(cube_2way_context) );
|
||||
h[ 0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 );
|
||||
h[ 2] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 );
|
||||
h[ 4] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 );
|
||||
h[ 6] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 );
|
||||
h[ 8] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 );
|
||||
h[10] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 );
|
||||
h[12] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B );
|
||||
h[14] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 );
|
||||
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
|
||||
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
|
||||
}
|
||||
else
|
||||
{
|
||||
h[ 0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 );
|
||||
h[ 2] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B );
|
||||
h[ 4] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 );
|
||||
h[ 6] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 );
|
||||
h[ 8] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 );
|
||||
h[10] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 );
|
||||
h[12] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE );
|
||||
h[14] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB );
|
||||
h[1] = h[ 0]; h[ 3] = h[ 2]; h[ 5] = h[ 4]; h[ 7] = h[ 6];
|
||||
h[9] = h[ 8]; h[11] = h[10]; h[13] = h[12]; h[15] = h[14];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int cube_2way_update( cube_2way_context *sp, const void *data, size_t size )
|
||||
{
|
||||
const int len = size / 16;
|
||||
const int len = size >> 4;
|
||||
const __m256i *in = (__m256i*)data;
|
||||
int i;
|
||||
|
||||
@@ -140,7 +154,6 @@ int cube_2way_update( cube_2way_context *sp, const void *data, size_t size )
|
||||
sp->pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -151,25 +164,22 @@ int cube_2way_close( cube_2way_context *sp, void *output )
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
_mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
|
||||
1,0,0,0 ) );
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7],
|
||||
_mm256_set_epi32( 1,0,0,0, 1,0,0,0 ) );
|
||||
|
||||
for ( i = 0; i < sp->hashlen; i++ )
|
||||
hash[i] = sp->h[i];
|
||||
for ( i = 0; i < 10; ++i ) transform_2way( sp );
|
||||
|
||||
memcpy( hash, sp->h, sp->hashlen<<5 );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
const void *data, size_t size )
|
||||
{
|
||||
const int len = size / 16;
|
||||
const int len = size >> 4;
|
||||
const __m256i *in = (__m256i*)data;
|
||||
__m256i *hash = (__m256i*)output;
|
||||
int i;
|
||||
@@ -187,18 +197,15 @@ int cube_2way_update_close( cube_2way_context *sp, void *output,
|
||||
|
||||
// pos is zero for 64 byte data, 1 for 80 byte data.
|
||||
sp->h[ sp->pos ] = _mm256_xor_si256( sp->h[ sp->pos ],
|
||||
_mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x80 ) );
|
||||
_mm256_set_epi32( 0,0,0,0x80, 0,0,0,0x80 ) );
|
||||
transform_2way( sp );
|
||||
|
||||
sp->h[7] = _mm256_xor_si256( sp->h[7], _mm256_set_epi32( 1,0,0,0,
|
||||
1,0,0,0 ) );
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform_2way( &cube_2way_ctx_cache );
|
||||
|
||||
for ( i = 0; i < sp->hashlen; i++ )
|
||||
hash[i] = sp->h[i];
|
||||
for ( i = 0; i < 10; ++i ) transform_2way( sp );
|
||||
|
||||
memcpy( hash, sp->h, sp->hashlen<<5 );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -4,18 +4,18 @@
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include <stdint.h>
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
// 2x128, 2 way parallel SSE2
|
||||
|
||||
struct _cube_2way_context
|
||||
{
|
||||
__m256i h[8];
|
||||
int hashlen; // __m128i
|
||||
int rounds;
|
||||
int blocksize; // __m128i
|
||||
int pos; // number of __m128i read into x from current block
|
||||
__m256i h[8] __attribute__ ((aligned (64)));
|
||||
};
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
typedef struct _cube_2way_context cube_2way_context;
|
||||
|
||||
|
@@ -13,7 +13,8 @@
|
||||
#include <stdbool.h>
|
||||
#include <unistd.h>
|
||||
#include <memory.h>
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
#include <stdio.h>
|
||||
|
||||
static void transform( cubehashParam *sp )
|
||||
{
|
||||
@@ -34,26 +35,22 @@ static void transform( cubehashParam *sp )
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = x0;
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( x1, 7 ),
|
||||
_mm256_srli_epi32( x1, 25 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y0, 7 ),
|
||||
_mm256_srli_epi32( y0, 25 ) );
|
||||
x0 = mm256_rol_32( x1, 7 );
|
||||
x1 = mm256_rol_32( y0, 7 );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0x4e );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0x4e );
|
||||
x2 = mm256_swap64_128( x2 );
|
||||
x3 = mm256_swap64_128( x3 );
|
||||
x2 = _mm256_add_epi32( x0, x2 );
|
||||
x3 = _mm256_add_epi32( x1, x3 );
|
||||
y0 = _mm256_permute4x64_epi64( x0, 0x4e );
|
||||
y1 = _mm256_permute4x64_epi64( x1, 0x4e );
|
||||
x0 = _mm256_xor_si256( _mm256_slli_epi32( y0, 11 ),
|
||||
_mm256_srli_epi32( y0, 21 ) );
|
||||
x1 = _mm256_xor_si256( _mm256_slli_epi32( y1, 11 ),
|
||||
_mm256_srli_epi32( y1, 21 ) );
|
||||
y0 = mm256_swap_128( x0 );
|
||||
y1 = mm256_swap_128( x1 );
|
||||
x0 = mm256_rol_32( y0, 11 );
|
||||
x1 = mm256_rol_32( y1, 11 );
|
||||
x0 = _mm256_xor_si256( x0, x2 );
|
||||
x1 = _mm256_xor_si256( x1, x3 );
|
||||
x2 = _mm256_shuffle_epi32( x2, 0xb1 );
|
||||
x3 = _mm256_shuffle_epi32( x3, 0xb1 );
|
||||
x2 = mm256_swap32_64( x2 );
|
||||
x3 = mm256_swap32_64( x3 );
|
||||
}
|
||||
|
||||
_mm256_store_si256( (__m256i*)sp->x, x0 );
|
||||
@@ -128,48 +125,58 @@ static void transform( cubehashParam *sp )
|
||||
#endif
|
||||
} // transform
|
||||
|
||||
// Cubehash context initializing is very expensive.
|
||||
// Cache the intial value for faster reinitializing.
|
||||
cubehashParam cube_ctx_cache __attribute__ ((aligned (64)));
|
||||
|
||||
int cubehashReinit( cubehashParam *sp )
|
||||
/*
|
||||
// The result of hashing 10 rounds of initial data which is params and
|
||||
// mostly zeros.
|
||||
static const uint64_t IV256[] =
|
||||
{
|
||||
memcpy( sp, &cube_ctx_cache, sizeof(cubehashParam) );
|
||||
return SUCCESS;
|
||||
0xCCD6F29FEA2BD4B4, 0x35481EAE63117E71, 0xE5D94E6322512D5B, 0xF4CC12BE7E624131,
|
||||
0x42AF2070C2D0B696, 0x3361DA8CD0720C35, 0x8EF8AD8328CCECA4, 0x40E5FBAB4680AC00,
|
||||
0x6107FBD5D89041C3, 0xF0B266796C859D41, 0x5FA2560309392549, 0x93CB628565C892FD,
|
||||
0x9E4B4E602AF2B5AE, 0x85254725774ABFDD, 0x4AB6AAD615815AEB, 0xD6032C0A9CDAF8AF
|
||||
};
|
||||
|
||||
}
|
||||
static const uint64_t IV512[] =
|
||||
{
|
||||
0x50F494D42AEA2A61, 0x4167D83E2D538B8B, 0xC701CF8C3FEE2313, 0x50AC5695CC39968E,
|
||||
0xA647A8B34D42C787, 0x825B453797CF0BEF, 0xF22090C4EEF864D2, 0xA23911AED0E5CD33,
|
||||
0x148FE485FCD398D9, 0xB64445321B017BEF, 0x2FF5781C6A536159, 0x0DBADEA991FA7934,
|
||||
0xA5A70E75D65C8A2B, 0xBC796576B1C62456, 0xE7989AF11921C8F7, 0xD43E3B447795D246
|
||||
};
|
||||
*/
|
||||
|
||||
// Initialize the cache then copy to sp.
|
||||
int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
|
||||
{
|
||||
int i;
|
||||
__m128i *x = (__m128i*)sp->x;
|
||||
sp->hashlen = hashbitlen/128;
|
||||
sp->blocksize = blockbytes/16;
|
||||
sp->rounds = rounds;
|
||||
sp->pos = 0;
|
||||
|
||||
if ( hashbitlen < 8 ) return BAD_HASHBITLEN;
|
||||
if ( hashbitlen > 512 ) return BAD_HASHBITLEN;
|
||||
if ( hashbitlen != 8 * (hashbitlen / 8) ) return BAD_HASHBITLEN;
|
||||
if ( hashbitlen == 512 )
|
||||
{
|
||||
|
||||
/* Sanity checks */
|
||||
if ( rounds <= 0 || rounds > 32 )
|
||||
rounds = CUBEHASH_ROUNDS;
|
||||
if ( blockbytes <= 0 || blockbytes >= 256)
|
||||
blockbytes = CUBEHASH_BLOCKBYTES;
|
||||
x[0] = m128_const_64( 0x4167D83E2D538B8B, 0x50F494D42AEA2A61 );
|
||||
x[1] = m128_const_64( 0x50AC5695CC39968E, 0xC701CF8C3FEE2313 );
|
||||
x[2] = m128_const_64( 0x825B453797CF0BEF, 0xA647A8B34D42C787 );
|
||||
x[3] = m128_const_64( 0xA23911AED0E5CD33, 0xF22090C4EEF864D2 );
|
||||
x[4] = m128_const_64( 0xB64445321B017BEF, 0x148FE485FCD398D9 );
|
||||
x[5] = m128_const_64( 0x0DBADEA991FA7934, 0x2FF5781C6A536159 );
|
||||
x[6] = m128_const_64( 0xBC796576B1C62456, 0xA5A70E75D65C8A2B );
|
||||
x[7] = m128_const_64( 0xD43E3B447795D246, 0xE7989AF11921C8F7 );
|
||||
}
|
||||
else
|
||||
{
|
||||
x[0] = m128_const_64( 0x35481EAE63117E71, 0xCCD6F29FEA2BD4B4 );
|
||||
x[1] = m128_const_64( 0xF4CC12BE7E624131, 0xE5D94E6322512D5B );
|
||||
x[2] = m128_const_64( 0x3361DA8CD0720C35, 0x42AF2070C2D0B696 );
|
||||
x[3] = m128_const_64( 0x40E5FBAB4680AC00, 0x8EF8AD8328CCECA4 );
|
||||
x[4] = m128_const_64( 0xF0B266796C859D41, 0x6107FBD5D89041C3 );
|
||||
x[5] = m128_const_64( 0x93CB628565C892FD, 0x5FA2560309392549 );
|
||||
x[6] = m128_const_64( 0x85254725774ABFDD, 0x9E4B4E602AF2B5AE );
|
||||
x[7] = m128_const_64( 0xD6032C0A9CDAF8AF, 0x4AB6AAD615815AEB );
|
||||
}
|
||||
|
||||
// all sizes of __m128i
|
||||
cube_ctx_cache.hashlen = hashbitlen/128;
|
||||
cube_ctx_cache.blocksize = blockbytes/16;
|
||||
cube_ctx_cache.rounds = rounds;
|
||||
cube_ctx_cache.pos = 0;
|
||||
|
||||
for ( i = 0; i < 8; ++i )
|
||||
cube_ctx_cache.x[i] = _mm_setzero_si128();;
|
||||
|
||||
cube_ctx_cache.x[0] = _mm_set_epi32( 0, rounds, blockbytes,
|
||||
hashbitlen / 8 );
|
||||
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform( &cube_ctx_cache );
|
||||
|
||||
memcpy( sp, &cube_ctx_cache, sizeof(cubehashParam) );
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
@@ -254,6 +261,7 @@ int cubehashUpdateDigest( cubehashParam *sp, byte *digest,
|
||||
transform( sp );
|
||||
|
||||
sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi32( 1,0,0,0 ) );
|
||||
|
||||
transform( sp );
|
||||
transform( sp );
|
||||
transform( sp );
|
@@ -60,336 +60,174 @@ MYALIGN const unsigned int zero[] = {0x00000000, 0x00000000, 0x00000000, 0x000
|
||||
MYALIGN const unsigned int mul2ipt[] = {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234};
|
||||
|
||||
|
||||
//#include "crypto_hash.h"
|
||||
|
||||
int crypto_hash(
|
||||
unsigned char *out,
|
||||
const unsigned char *in,
|
||||
unsigned long long inlen
|
||||
)
|
||||
{
|
||||
|
||||
if(hash_echo(512, in, inlen * 8, out) == SUCCESS)
|
||||
return 0;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
#if 0
|
||||
void DumpState(__m128i *ps)
|
||||
{
|
||||
int i, j, k;
|
||||
unsigned int ucol;
|
||||
|
||||
for(j = 0; j < 4; j++)
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
printf("row %d,col %d : ", i, j);
|
||||
for(k = 0; k < 4; k++)
|
||||
{
|
||||
ucol = *((int*)ps + 16 * i + 4 * j + k);
|
||||
printf("%02x%02x%02x%02x ", (ucol >> 0) & 0xff, (ucol >> 8) & 0xff, (ucol >> 16) & 0xff, (ucol >> 24) & 0xff);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#define ECHO_SUBBYTES(state, i, j) \
|
||||
state[i][j] = _mm_aesenc_si128(state[i][j], k1);\
|
||||
state[i][j] = _mm_aesenc_si128(state[i][j], M128(zero));\
|
||||
k1 = _mm_add_epi32(k1, M128(const1))
|
||||
#else
|
||||
#define ECHO_SUBBYTES(state, i, j) \
|
||||
AES_ROUND_VPERM(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
|
||||
state[i][j] = _mm_xor_si128(state[i][j], k1);\
|
||||
AES_ROUND_VPERM(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
|
||||
k1 = _mm_add_epi32(k1, M128(const1))
|
||||
|
||||
#define ECHO_SUB_AND_MIX(state, i, j, state2, c, r1, r2, r3, r4) \
|
||||
AES_ROUND_VPERM_CORE(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
|
||||
ktemp = k1;\
|
||||
TRANSFORM(ktemp, _k_ipt, t1, t4);\
|
||||
state[i][j] = _mm_xor_si128(state[i][j], ktemp);\
|
||||
AES_ROUND_VPERM_CORE(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
|
||||
k1 = _mm_add_epi32(k1, M128(const1));\
|
||||
s1 = state[i][j];\
|
||||
s2 = s1;\
|
||||
TRANSFORM(s2, mul2ipt, t1, t2);\
|
||||
s3 = _mm_xor_si128(s1, s2);\
|
||||
state2[r1][c] = _mm_xor_si128(state2[r1][c], s2);\
|
||||
state2[r2][c] = _mm_xor_si128(state2[r2][c], s1);\
|
||||
state2[r3][c] = _mm_xor_si128(state2[r3][c], s1);\
|
||||
state2[r4][c] = _mm_xor_si128(state2[r4][c], s3)
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
state[i][j] = _mm_aesenc_si128(state[i][j], k1);\
|
||||
state[i][j] = _mm_aesenc_si128(state[i][j], M128(zero));\
|
||||
k1 = _mm_add_epi32(k1, M128(const1))
|
||||
|
||||
#define ECHO_MIXBYTES(state1, state2, j, t1, t2, s2) \
|
||||
s2 = _mm_add_epi8(state1[0][j], state1[0][j]);\
|
||||
t1 = _mm_srli_epi16(state1[0][j], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = s2;\
|
||||
state2[1][j] = state1[0][j];\
|
||||
state2[2][j] = state1[0][j];\
|
||||
state2[3][j] = _mm_xor_si128(s2, state1[0][j]);\
|
||||
s2 = _mm_add_epi8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[1][(j + 1) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], _mm_xor_si128(s2, state1[1][(j + 1) & 3]));\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], s2);\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], state1[1][(j + 1) & 3]);\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], state1[1][(j + 1) & 3]);\
|
||||
s2 = _mm_add_epi8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[2][(j + 2) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], state1[2][(j + 2) & 3]);\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], _mm_xor_si128(s2, state1[2][(j + 2) & 3]));\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], s2);\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], state1[2][(j + 2) & 3]);\
|
||||
s2 = _mm_add_epi8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[3][(j + 3) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], state1[3][(j + 3) & 3]);\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], state1[3][(j + 3) & 3]);\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], _mm_xor_si128(s2, state1[3][(j + 3) & 3]));\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], s2)
|
||||
s2 = _mm_add_epi8(state1[0][j], state1[0][j]);\
|
||||
t1 = _mm_srli_epi16(state1[0][j], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = s2;\
|
||||
state2[1][j] = state1[0][j];\
|
||||
state2[2][j] = state1[0][j];\
|
||||
state2[3][j] = _mm_xor_si128(s2, state1[0][j]);\
|
||||
s2 = _mm_add_epi8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[1][(j + 1) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], _mm_xor_si128(s2, state1[1][(j + 1) & 3]));\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], s2);\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], state1[1][(j + 1) & 3]);\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], state1[1][(j + 1) & 3]);\
|
||||
s2 = _mm_add_epi8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[2][(j + 2) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], state1[2][(j + 2) & 3]);\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], _mm_xor_si128(s2, state1[2][(j + 2) & 3]));\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], s2);\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], state1[2][(j + 2) & 3]);\
|
||||
s2 = _mm_add_epi8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\
|
||||
t1 = _mm_srli_epi16(state1[3][(j + 3) & 3], 7);\
|
||||
t1 = _mm_and_si128(t1, M128(lsbmask));\
|
||||
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
|
||||
s2 = _mm_xor_si128(s2, t2);\
|
||||
state2[0][j] = _mm_xor_si128(state2[0][j], state1[3][(j + 3) & 3]);\
|
||||
state2[1][j] = _mm_xor_si128(state2[1][j], state1[3][(j + 3) & 3]);\
|
||||
state2[2][j] = _mm_xor_si128(state2[2][j], _mm_xor_si128(s2, state1[3][(j + 3) & 3]));\
|
||||
state2[3][j] = _mm_xor_si128(state2[3][j], s2)
|
||||
|
||||
|
||||
#define ECHO_ROUND_UNROLL2 \
|
||||
ECHO_SUBBYTES(_state, 0, 0);\
|
||||
ECHO_SUBBYTES(_state, 1, 0);\
|
||||
ECHO_SUBBYTES(_state, 2, 0);\
|
||||
ECHO_SUBBYTES(_state, 3, 0);\
|
||||
ECHO_SUBBYTES(_state, 0, 1);\
|
||||
ECHO_SUBBYTES(_state, 1, 1);\
|
||||
ECHO_SUBBYTES(_state, 2, 1);\
|
||||
ECHO_SUBBYTES(_state, 3, 1);\
|
||||
ECHO_SUBBYTES(_state, 0, 2);\
|
||||
ECHO_SUBBYTES(_state, 1, 2);\
|
||||
ECHO_SUBBYTES(_state, 2, 2);\
|
||||
ECHO_SUBBYTES(_state, 3, 2);\
|
||||
ECHO_SUBBYTES(_state, 0, 3);\
|
||||
ECHO_SUBBYTES(_state, 1, 3);\
|
||||
ECHO_SUBBYTES(_state, 2, 3);\
|
||||
ECHO_SUBBYTES(_state, 3, 3);\
|
||||
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 0);\
|
||||
ECHO_SUBBYTES(_state2, 1, 0);\
|
||||
ECHO_SUBBYTES(_state2, 2, 0);\
|
||||
ECHO_SUBBYTES(_state2, 3, 0);\
|
||||
ECHO_SUBBYTES(_state2, 0, 1);\
|
||||
ECHO_SUBBYTES(_state2, 1, 1);\
|
||||
ECHO_SUBBYTES(_state2, 2, 1);\
|
||||
ECHO_SUBBYTES(_state2, 3, 1);\
|
||||
ECHO_SUBBYTES(_state2, 0, 2);\
|
||||
ECHO_SUBBYTES(_state2, 1, 2);\
|
||||
ECHO_SUBBYTES(_state2, 2, 2);\
|
||||
ECHO_SUBBYTES(_state2, 3, 2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 3);\
|
||||
ECHO_SUBBYTES(_state2, 1, 3);\
|
||||
ECHO_SUBBYTES(_state2, 2, 3);\
|
||||
ECHO_SUBBYTES(_state2, 3, 3);\
|
||||
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
|
||||
ECHO_SUBBYTES(_state, 0, 0);\
|
||||
ECHO_SUBBYTES(_state, 1, 0);\
|
||||
ECHO_SUBBYTES(_state, 2, 0);\
|
||||
ECHO_SUBBYTES(_state, 3, 0);\
|
||||
ECHO_SUBBYTES(_state, 0, 1);\
|
||||
ECHO_SUBBYTES(_state, 1, 1);\
|
||||
ECHO_SUBBYTES(_state, 2, 1);\
|
||||
ECHO_SUBBYTES(_state, 3, 1);\
|
||||
ECHO_SUBBYTES(_state, 0, 2);\
|
||||
ECHO_SUBBYTES(_state, 1, 2);\
|
||||
ECHO_SUBBYTES(_state, 2, 2);\
|
||||
ECHO_SUBBYTES(_state, 3, 2);\
|
||||
ECHO_SUBBYTES(_state, 0, 3);\
|
||||
ECHO_SUBBYTES(_state, 1, 3);\
|
||||
ECHO_SUBBYTES(_state, 2, 3);\
|
||||
ECHO_SUBBYTES(_state, 3, 3);\
|
||||
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 0);\
|
||||
ECHO_SUBBYTES(_state2, 1, 0);\
|
||||
ECHO_SUBBYTES(_state2, 2, 0);\
|
||||
ECHO_SUBBYTES(_state2, 3, 0);\
|
||||
ECHO_SUBBYTES(_state2, 0, 1);\
|
||||
ECHO_SUBBYTES(_state2, 1, 1);\
|
||||
ECHO_SUBBYTES(_state2, 2, 1);\
|
||||
ECHO_SUBBYTES(_state2, 3, 1);\
|
||||
ECHO_SUBBYTES(_state2, 0, 2);\
|
||||
ECHO_SUBBYTES(_state2, 1, 2);\
|
||||
ECHO_SUBBYTES(_state2, 2, 2);\
|
||||
ECHO_SUBBYTES(_state2, 3, 2);\
|
||||
ECHO_SUBBYTES(_state2, 0, 3);\
|
||||
ECHO_SUBBYTES(_state2, 1, 3);\
|
||||
ECHO_SUBBYTES(_state2, 2, 3);\
|
||||
ECHO_SUBBYTES(_state2, 3, 3);\
|
||||
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
|
||||
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
|
||||
|
||||
|
||||
|
||||
#define SAVESTATE(dst, src)\
|
||||
dst[0][0] = src[0][0];\
|
||||
dst[0][1] = src[0][1];\
|
||||
dst[0][2] = src[0][2];\
|
||||
dst[0][3] = src[0][3];\
|
||||
dst[1][0] = src[1][0];\
|
||||
dst[1][1] = src[1][1];\
|
||||
dst[1][2] = src[1][2];\
|
||||
dst[1][3] = src[1][3];\
|
||||
dst[2][0] = src[2][0];\
|
||||
dst[2][1] = src[2][1];\
|
||||
dst[2][2] = src[2][2];\
|
||||
dst[2][3] = src[2][3];\
|
||||
dst[3][0] = src[3][0];\
|
||||
dst[3][1] = src[3][1];\
|
||||
dst[3][2] = src[3][2];\
|
||||
dst[3][3] = src[3][3]
|
||||
dst[0][0] = src[0][0];\
|
||||
dst[0][1] = src[0][1];\
|
||||
dst[0][2] = src[0][2];\
|
||||
dst[0][3] = src[0][3];\
|
||||
dst[1][0] = src[1][0];\
|
||||
dst[1][1] = src[1][1];\
|
||||
dst[1][2] = src[1][2];\
|
||||
dst[1][3] = src[1][3];\
|
||||
dst[2][0] = src[2][0];\
|
||||
dst[2][1] = src[2][1];\
|
||||
dst[2][2] = src[2][2];\
|
||||
dst[2][3] = src[2][3];\
|
||||
dst[3][0] = src[3][0];\
|
||||
dst[3][1] = src[3][1];\
|
||||
dst[3][2] = src[3][2];\
|
||||
dst[3][3] = src[3][3]
|
||||
|
||||
|
||||
void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
|
||||
{
|
||||
unsigned int r, b, i, j;
|
||||
// __m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
|
||||
__m128i t1, t2, s2, k1;
|
||||
__m128i _state[4][4], _state2[4][4], _statebackup[4][4];
|
||||
unsigned int r, b, i, j;
|
||||
__m128i t1, t2, s2, k1;
|
||||
__m128i _state[4][4], _state2[4][4], _statebackup[4][4];
|
||||
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < ctx->uHashSize / 256; j++)
|
||||
_state[i][j] = ctx->state[i][j];
|
||||
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < ctx->uHashSize / 256; j++)
|
||||
_state[i][j] = ctx->state[i][j];
|
||||
for(b = 0; b < uBlockCount; b++)
|
||||
{
|
||||
ctx->k = _mm_add_epi64(ctx->k, ctx->const1536);
|
||||
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// transform cv
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < ctx->uHashSize / 256; j++)
|
||||
{
|
||||
TRANSFORM(_state[i][j], _k_ipt, t1, t2);
|
||||
}
|
||||
#endif
|
||||
|
||||
for(b = 0; b < uBlockCount; b++)
|
||||
// load message
|
||||
for(j = ctx->uHashSize / 256; j < 4; j++)
|
||||
{
|
||||
ctx->k = _mm_add_epi64(ctx->k, ctx->const1536);
|
||||
|
||||
// load message
|
||||
for(j = ctx->uHashSize / 256; j < 4; j++)
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][j] = _mm_loadu_si128((__m128i*)pmsg + 4 * (j - (ctx->uHashSize / 256)) + i);
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// transform message
|
||||
TRANSFORM(_state[i][j], _k_ipt, t1, t2);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// save state
|
||||
SAVESTATE(_statebackup, _state);
|
||||
|
||||
|
||||
k1 = ctx->k;
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
for(r = 0; r < ctx->uRounds / 2; r++)
|
||||
{
|
||||
ECHO_ROUND_UNROLL2;
|
||||
}
|
||||
|
||||
#else
|
||||
for(r = 0; r < ctx->uRounds / 2; r++)
|
||||
{
|
||||
_state2[0][0] = M128(zero); _state2[1][0] = M128(zero); _state2[2][0] = M128(zero); _state2[3][0] = M128(zero);
|
||||
_state2[0][1] = M128(zero); _state2[1][1] = M128(zero); _state2[2][1] = M128(zero); _state2[3][1] = M128(zero);
|
||||
_state2[0][2] = M128(zero); _state2[1][2] = M128(zero); _state2[2][2] = M128(zero); _state2[3][2] = M128(zero);
|
||||
_state2[0][3] = M128(zero); _state2[1][3] = M128(zero); _state2[2][3] = M128(zero); _state2[3][3] = M128(zero);
|
||||
|
||||
ECHO_SUB_AND_MIX(_state, 0, 0, _state2, 0, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state, 1, 0, _state2, 3, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state, 2, 0, _state2, 2, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state, 3, 0, _state2, 1, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state, 0, 1, _state2, 1, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state, 1, 1, _state2, 0, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state, 2, 1, _state2, 3, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state, 3, 1, _state2, 2, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state, 0, 2, _state2, 2, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state, 1, 2, _state2, 1, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state, 2, 2, _state2, 0, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state, 3, 2, _state2, 3, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state, 0, 3, _state2, 3, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state, 1, 3, _state2, 2, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state, 2, 3, _state2, 1, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state, 3, 3, _state2, 0, 3, 0, 1, 2);
|
||||
|
||||
_state[0][0] = M128(zero); _state[1][0] = M128(zero); _state[2][0] = M128(zero); _state[3][0] = M128(zero);
|
||||
_state[0][1] = M128(zero); _state[1][1] = M128(zero); _state[2][1] = M128(zero); _state[3][1] = M128(zero);
|
||||
_state[0][2] = M128(zero); _state[1][2] = M128(zero); _state[2][2] = M128(zero); _state[3][2] = M128(zero);
|
||||
_state[0][3] = M128(zero); _state[1][3] = M128(zero); _state[2][3] = M128(zero); _state[3][3] = M128(zero);
|
||||
|
||||
ECHO_SUB_AND_MIX(_state2, 0, 0, _state, 0, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state2, 1, 0, _state, 3, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state2, 2, 0, _state, 2, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state2, 3, 0, _state, 1, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state2, 0, 1, _state, 1, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state2, 1, 1, _state, 0, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state2, 2, 1, _state, 3, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state2, 3, 1, _state, 2, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state2, 0, 2, _state, 2, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state2, 1, 2, _state, 1, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state2, 2, 2, _state, 0, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state2, 3, 2, _state, 3, 3, 0, 1, 2);
|
||||
ECHO_SUB_AND_MIX(_state2, 0, 3, _state, 3, 0, 1, 2, 3);
|
||||
ECHO_SUB_AND_MIX(_state2, 1, 3, _state, 2, 1, 2, 3, 0);
|
||||
ECHO_SUB_AND_MIX(_state2, 2, 3, _state, 1, 2, 3, 0, 1);
|
||||
ECHO_SUB_AND_MIX(_state2, 3, 3, _state, 0, 3, 0, 1, 2);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if(ctx->uHashSize == 256)
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][1]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][3]);
|
||||
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][1]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][3]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _state[i][3]);
|
||||
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
|
||||
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][1]);
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][3]);
|
||||
}
|
||||
}
|
||||
|
||||
pmsg += ctx->uBlockLength;
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][j] = _mm_loadu_si128((__m128i*)pmsg + 4 * (j - (ctx->uHashSize / 256)) + i);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// transform state
|
||||
for(i = 0; i < 4; i++)
|
||||
for(j = 0; j < 4; j++)
|
||||
{
|
||||
TRANSFORM(_state[i][j], _k_opt, t1, t2);
|
||||
}
|
||||
#endif
|
||||
// save state
|
||||
SAVESTATE(_statebackup, _state);
|
||||
|
||||
SAVESTATE(ctx->state, _state);
|
||||
k1 = ctx->k;
|
||||
|
||||
for(r = 0; r < ctx->uRounds / 2; r++)
|
||||
{
|
||||
ECHO_ROUND_UNROLL2;
|
||||
}
|
||||
|
||||
if(ctx->uHashSize == 256)
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][1]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][3]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][1]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][3]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _state[i][3]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
|
||||
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][1]);
|
||||
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][3]);
|
||||
}
|
||||
}
|
||||
pmsg += ctx->uBlockLength;
|
||||
}
|
||||
SAVESTATE(ctx->state, _state);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -30,6 +30,7 @@
|
||||
typedef struct
|
||||
{
|
||||
__m128i state[4][4];
|
||||
BitSequence buffer[192];
|
||||
__m128i k;
|
||||
__m128i hashsize;
|
||||
__m128i const1536;
|
||||
@@ -39,9 +40,8 @@ typedef struct
|
||||
unsigned int uBlockLength;
|
||||
unsigned int uBufferBytes;
|
||||
DataLength processed_bits;
|
||||
BitSequence buffer[192];
|
||||
|
||||
} hashState_echo;
|
||||
} hashState_echo __attribute__ ((aligned (64)));
|
||||
|
||||
HashReturn init_echo(hashState_echo *state, int hashbitlen);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,320 +0,0 @@
|
||||
/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* ECHO interface. ECHO is a family of functions which differ by
|
||||
* their output size; this implementation defines ECHO for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_echo.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_ECHO_H__
|
||||
#define SPH_ECHO_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-224.
|
||||
*/
|
||||
#define SPH_SIZE_echo224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-256.
|
||||
*/
|
||||
#define SPH_SIZE_echo256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-384.
|
||||
*/
|
||||
#define SPH_SIZE_echo384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-512.
|
||||
*/
|
||||
#define SPH_SIZE_echo512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-224
|
||||
* and ECHO-256.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[192]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[4][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[4][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-384
|
||||
* and ECHO-512.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[8][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[8][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_big_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-224 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo224_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-256 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo256_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo384_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo512_context;
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-224 context (pointer to a
|
||||
* <code>sph_echo224_context</code>)
|
||||
*/
|
||||
void sph_echo224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-256 context (pointer to a
|
||||
* <code>sph_echo256_context</code>)
|
||||
*/
|
||||
void sph_echo256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-384 context (pointer to a
|
||||
* <code>sph_echo384_context</code>)
|
||||
*/
|
||||
void sph_echo384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-512 context (pointer to a
|
||||
* <code>sph_echo512_context</code>)
|
||||
*/
|
||||
void sph_echo512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -11,6 +11,8 @@ extern "C"{
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#define SPH_FUGUE_NOCOPY 1
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0xf4c9120d), SPH_C32(0x6286f757), SPH_C32(0xee39e01c),
|
||||
SPH_C32(0xe074e3cb), SPH_C32(0xa1127c62), SPH_C32(0x9a43d215),
|
||||
|
@@ -43,7 +43,7 @@
|
||||
# if !defined( __MINGW32__ ) && !defined( _AIX )
|
||||
# include <endian.h>
|
||||
# if !defined( __BEOS__ )
|
||||
# include <byteswap.h>
|
||||
//# include <byteswap.h>
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
@@ -12,7 +12,7 @@
|
||||
#include <memory.h>
|
||||
#include "hash-groestl.h"
|
||||
#include "miner.h"
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
|
||||
|
@@ -9,7 +9,7 @@
|
||||
#include <memory.h>
|
||||
#include "hash-groestl256.h"
|
||||
#include "miner.h"
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
|
||||
|
@@ -56,14 +56,15 @@ void groestlhash( void *output, const void *input )
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_groestl( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_groestl( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
@@ -54,8 +54,8 @@ void myriad_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_myriad(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_myriad( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -63,6 +63,7 @@ int scanhash_myriad(int thr_id, struct work *work,
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
@@ -33,7 +33,7 @@ void myriad_4way_hash( void *output, const void *input )
|
||||
myrgr_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_4way_ctx, sizeof(myrgr_4way_ctx) );
|
||||
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, input, 640 );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 640 );
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
@@ -43,66 +43,52 @@ void myriad_4way_hash( void *output, const void *input )
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 640 );
|
||||
|
||||
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha256_4way( &ctx.sha, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha, vhash );
|
||||
|
||||
mm_deinterleave_4x32( output, output+32, output+64, output+96,
|
||||
vhash, 256 );
|
||||
sha256_4way_close( &ctx.sha, output );
|
||||
}
|
||||
|
||||
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
/*
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
*/
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||
|
||||
myriad_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane ] <= Htarg )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -12,8 +12,8 @@
|
||||
|
||||
void myriad_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_myrgr_4way_ctx();
|
||||
|
||||
@@ -21,8 +21,8 @@ void init_myrgr_4way_ctx();
|
||||
|
||||
void myriad_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_myriad( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_myrgr_ctx();
|
||||
|
||||
|
@@ -531,16 +531,17 @@ static const sph_u32 T512[64][16] = {
|
||||
|
||||
#define INPUT_BIG \
|
||||
do { \
|
||||
const __m256i zero = _mm256_setzero_si256(); \
|
||||
__m256i db = *buf; \
|
||||
const sph_u32 *tp = &T512[0][0]; \
|
||||
m0 = m256_zero; \
|
||||
m1 = m256_zero; \
|
||||
m2 = m256_zero; \
|
||||
m3 = m256_zero; \
|
||||
m4 = m256_zero; \
|
||||
m5 = m256_zero; \
|
||||
m6 = m256_zero; \
|
||||
m7 = m256_zero; \
|
||||
m0 = zero; \
|
||||
m1 = zero; \
|
||||
m2 = zero; \
|
||||
m3 = zero; \
|
||||
m4 = zero; \
|
||||
m5 = zero; \
|
||||
m6 = zero; \
|
||||
m7 = zero; \
|
||||
for ( int u = 0; u < 64; u++ ) \
|
||||
{ \
|
||||
__m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
|
||||
@@ -913,9 +914,7 @@ void hamsi512_4way( hamsi_4way_big_context *sc, const void *data, size_t len )
|
||||
|
||||
void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
|
||||
{
|
||||
__m256i *out = (__m256i*)dst;
|
||||
__m256i pad[1];
|
||||
size_t u;
|
||||
int ch, cl;
|
||||
|
||||
sph_enc32be( &ch, sc->count_high );
|
||||
@@ -925,8 +924,8 @@ void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
|
||||
0UL, 0x80UL, 0UL, 0x80UL );
|
||||
hamsi_big( sc, sc->buf, 1 );
|
||||
hamsi_big_final( sc, pad );
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
out[u] = mm256_bswap_32( sc->h[u] );
|
||||
|
||||
mm256_block_bswap_32( (__m256i*)dst, sc->h );
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -40,7 +40,7 @@
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
|
@@ -83,7 +83,7 @@ extern "C"{
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( x1, x2 ), \
|
||||
_mm_or_si128( x4, x6 ) ), x5 ) ), \
|
||||
_mm_and_si128( x4, \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( mm_not(x2), x5 ), \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( mm128_not(x2), x5 ), \
|
||||
_mm_xor_si128( x1, x6 ) ), x0 ) ) ), \
|
||||
_mm_xor_si128( _mm_and_si128( x2, x6 ), x0 ) )
|
||||
|
||||
@@ -91,7 +91,7 @@ extern "C"{
|
||||
#define F5(x6, x5, x4, x3, x2, x1, x0) \
|
||||
_mm_xor_si128( \
|
||||
_mm_and_si128( x0, \
|
||||
mm_not( _mm_xor_si128( \
|
||||
mm128_not( _mm_xor_si128( \
|
||||
_mm_and_si128( _mm_and_si128( x1, x2 ), x3 ), x5 ) ) ), \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( x1, x4 ), \
|
||||
_mm_and_si128( x2, x5 ) ), \
|
||||
@@ -136,8 +136,8 @@ extern "C"{
|
||||
#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) \
|
||||
do { \
|
||||
__m128i t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
|
||||
x7 = _mm_add_epi32( _mm_add_epi32( mm_ror_32( t, 7 ), \
|
||||
mm_ror_32( x7, 11 ) ), \
|
||||
x7 = _mm_add_epi32( _mm_add_epi32( mm128_ror_32( t, 7 ), \
|
||||
mm128_ror_32( x7, 11 ) ), \
|
||||
_mm_add_epi32( w, _mm_set1_epi32( c ) ) ); \
|
||||
} while (0)
|
||||
|
||||
|
@@ -69,7 +69,7 @@ extern "C"{
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_haval256_5 256
|
||||
|
||||
|
@@ -131,12 +131,14 @@ void bastionhash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_bastion(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_bastion( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash32[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
@@ -79,11 +79,12 @@ extern void heavyhash(unsigned char* output, const unsigned char* input, int len
|
||||
|
||||
}
|
||||
|
||||
int scanhash_heavy(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_heavy( uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[8];
|
||||
uint32_t start_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
do {
|
||||
heavyhash((unsigned char *)hash, (unsigned char *)pdata, 80);
|
||||
|
@@ -3,7 +3,7 @@
|
||||
#include "wolf-aes.h"
|
||||
#include "miner.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#if defined(__AES__)
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
@@ -83,7 +83,7 @@ void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf)
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
|
||||
#define AESENC(i,j) \
|
||||
|
@@ -101,39 +101,6 @@ void hodl_build_block_header( struct work* g_work, uint32_t version,
|
||||
g_work->data[31] = 0x00000280;
|
||||
}
|
||||
|
||||
// hodl build_extra_header is redundant, hodl can use std_build_extra_header
|
||||
// and call hodl_build_block_header.
|
||||
#if 0
|
||||
void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
|
||||
{
|
||||
uchar merkle_tree[64] = { 0 };
|
||||
size_t t;
|
||||
// int i;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_tree, sctx );
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
|
||||
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
||||
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
|
||||
le32dec( sctx->job.ntime ), le32dec( sctx->job.nbits ) );
|
||||
/*
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
|
||||
|
||||
g_work->data[ algo_gate.ntime_index ] = le32dec( sctx->job.ntime );
|
||||
g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits );
|
||||
g_work->data[22] = 0x80000000;
|
||||
g_work->data[31] = 0x00000280;
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
|
||||
// called only by thread 0, saves a backup of g_work
|
||||
void hodl_get_new_work( struct work* work, struct work* g_work)
|
||||
{
|
||||
@@ -176,20 +143,20 @@ bool hodl_do_this_thread( int thr_id )
|
||||
return ( thr_id == 0 );
|
||||
}
|
||||
|
||||
int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int hodl_scanhash( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, thr_id );
|
||||
#if defined(__AES__)
|
||||
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id );
|
||||
pthread_barrier_wait( &hodl_barrier );
|
||||
return scanhash_hodl_wolf( thr_id, work, max_nonce, hashes_done );
|
||||
return scanhash_hodl_wolf( work, max_nonce, hashes_done, thr_info );
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool register_hodl_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
#if !defined(__AES__)
|
||||
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
|
||||
return false;
|
||||
#endif
|
||||
@@ -199,7 +166,7 @@ bool register_hodl_algo( algo_gate_t* gate )
|
||||
// return false;
|
||||
// }
|
||||
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
|
||||
gate->optimizations = AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&hodl_scanhash;
|
||||
gate->get_new_work = (void*)&hodl_get_new_work;
|
||||
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;
|
||||
@@ -207,7 +174,6 @@ bool register_hodl_algo( algo_gate_t* gate )
|
||||
gate->build_stratum_request = (void*)&hodl_le_build_stratum_request;
|
||||
gate->malloc_txs_request = (void*)&hodl_malloc_txs_request;
|
||||
gate->build_block_header = (void*)&hodl_build_block_header;
|
||||
// gate->build_extraheader = (void*)&hodl_build_extraheader;
|
||||
gate->resync_threads = (void*)&hodl_resync_threads;
|
||||
gate->do_this_thread = (void*)&hodl_do_this_thread;
|
||||
gate->work_cmp_size = 76;
|
||||
|
@@ -8,7 +8,7 @@
|
||||
#include "hodl-wolf.h"
|
||||
#include "miner.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#if defined(__AES__)
|
||||
|
||||
void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
|
||||
void *MidHash )
|
||||
@@ -17,7 +17,7 @@ void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
|
||||
const uint32_t StartChunk = ThreadID * Chunk;
|
||||
const uint32_t EndChunk = StartChunk + Chunk;
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
|
||||
uint64_t* desination[ SHA512_PARALLEL_N ];
|
||||
@@ -61,13 +61,14 @@ void Rev256(uint32_t *Dest, const uint32_t *Src)
|
||||
}
|
||||
*/
|
||||
|
||||
int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int threadNumber = mythr->id;
|
||||
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;
|
||||
CacheEntry Cache[AES_PARALLEL_N];
|
||||
__m128i* data[AES_PARALLEL_N];
|
||||
@@ -147,6 +148,7 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;
|
||||
CacheEntry Cache;
|
||||
uint32_t CollisionCount = 0;
|
||||
int threadNumber = mythr->id;
|
||||
|
||||
swab32_array( BlockHdr, pdata, 20 );
|
||||
// Search for pattern in psuedorandom data
|
||||
@@ -160,7 +162,6 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
{
|
||||
// copy data to first l2 cache
|
||||
memcpy(Cache.dwords, Garbage + k, GARBAGE_SLICE_SIZE);
|
||||
#ifndef NO_AES_NI
|
||||
for(int j = 0; j < AES_ITERATIONS; j++)
|
||||
{
|
||||
CacheEntry TmpXOR;
|
||||
@@ -184,7 +185,6 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
AES256CBC( Cache.dqwords, TmpXOR.dqwords, ExpKey,
|
||||
TmpXOR.dqwords[ (GARBAGE_SLICE_SIZE / sizeof(__m128i))
|
||||
- 1 ], 256 ); }
|
||||
#endif
|
||||
// use last X bits as solution
|
||||
if( ( Cache.dwords[ (GARBAGE_SLICE_SIZE >> 2) - 1 ]
|
||||
& (COMPARE_SIZE - 1) ) < 1000 )
|
||||
@@ -206,7 +206,7 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
*hashes_done = CollisionCount;
|
||||
return(0);
|
||||
|
||||
#endif
|
||||
#endif // AVX else
|
||||
|
||||
}
|
||||
|
||||
@@ -218,5 +218,5 @@ void GenRandomGarbage(CacheEntry *Garbage, uint32_t *pdata, int thr_id)
|
||||
GenerateGarbageCore(Garbage, thr_id, opt_n_threads, MidHash);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // AES
|
||||
|
||||
|
@@ -19,8 +19,8 @@ typedef union _CacheEntry
|
||||
__m128i dqwords[GARBAGE_SLICE_SIZE >> 4] __attribute__((aligned(16)));
|
||||
} CacheEntry;
|
||||
|
||||
int scanhash_hodl_wolf( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void GenRandomGarbage( CacheEntry *Garbage, uint32_t *pdata, int thr_id);
|
||||
|
||||
|
@@ -22,16 +22,22 @@ typedef struct
|
||||
#ifdef __AVX2__
|
||||
__m256i h[8];
|
||||
__m256i w[80];
|
||||
#else // AVX
|
||||
#elif defined(__SSE4_2__)
|
||||
//#elif defined(__AVX__)
|
||||
__m128i h[8];
|
||||
__m128i w[80];
|
||||
#else
|
||||
int dummy;
|
||||
#endif
|
||||
} Sha512Context;
|
||||
|
||||
#ifdef __AVX2__
|
||||
#define SHA512_PARALLEL_N 8
|
||||
#else // AVX
|
||||
#elif defined(__SSE4_2__)
|
||||
//#elif defined(__AVX__)
|
||||
#define SHA512_PARALLEL_N 4
|
||||
#else
|
||||
#define SHA512_PARALLEL_N 1 // dummy value
|
||||
#endif
|
||||
|
||||
//SHA-512 related functions
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#ifndef __AVX2__
|
||||
#ifdef __SSE4_2__
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
|
||||
//Dependencies
|
||||
@@ -10,6 +11,10 @@
|
||||
#include <sys/endian.h>
|
||||
#endif
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include "tmmintrin.h"
|
||||
#include "smmintrin.h"
|
||||
|
||||
|
@@ -8,6 +8,10 @@
|
||||
#include <sys/endian.h>
|
||||
#endif
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include "tmmintrin.h"
|
||||
#include "smmintrin.h"
|
||||
#include "immintrin.h"
|
||||
|
@@ -6,7 +6,7 @@
|
||||
|
||||
void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf);
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
|
||||
#define AES_PARALLEL_N 8
|
||||
|
@@ -44,7 +44,7 @@ extern "C"{
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_jh256 256
|
||||
|
||||
|
@@ -3,7 +3,6 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
//#include "avxdefs.h"
|
||||
|
||||
#if defined(JHA_4WAY)
|
||||
|
||||
@@ -13,9 +12,6 @@
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
|
||||
//static __thread keccak512_4way_context jha_kec_mid
|
||||
// __attribute__ ((aligned (64)));
|
||||
|
||||
void jha_hash_4way( void *out, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
@@ -46,7 +42,7 @@ void jha_hash_4way( void *out, const void *input )
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256(
|
||||
vh[0], _mm256_set1_epi64x( 1 ) ), m256_zero );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
@@ -59,7 +55,7 @@ void jha_hash_4way( void *out, const void *input )
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
mm256_interleave_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
@@ -77,26 +73,24 @@ void jha_hash_4way( void *out, const void *input )
|
||||
jh512_4way_close( &ctx_jh, vhashB );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
casti_m256i( out, i ) = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
}
|
||||
|
||||
mm256_deinterleave_4x64( out, out+32, out+64, out+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
@@ -115,11 +109,7 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0
|
||||
};
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
{
|
||||
@@ -127,29 +117,27 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
jha_hash_4way( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ ) if ( !( (hash7[i] & mask ) == 0 ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
extr_lane_4x64( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -12,14 +12,14 @@
|
||||
#if defined JHA_4WAY
|
||||
void jha_hash_4way( void *state, const void *input );
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void jha_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_jha( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_jha( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -81,7 +81,8 @@ void jha_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_jha( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
@@ -89,7 +90,8 @@ int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *ha
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
|
@@ -10,60 +10,50 @@
|
||||
|
||||
void keccakhash_4way(void *state, const void *input)
|
||||
{
|
||||
uint64_t vhash[4*4] __attribute__ ((aligned (64)));
|
||||
keccak256_4way_context ctx;
|
||||
|
||||
keccak256_4way_init( &ctx );
|
||||
keccak256_4way( &ctx, input, 80 );
|
||||
keccak256_4way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
keccak256_4way_close( &ctx, state );
|
||||
}
|
||||
|
||||
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
// const uint32_t Htarg = ptarget[7];
|
||||
uint32_t endiandata[20];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
keccakhash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -11,13 +11,13 @@
|
||||
#if defined(KECCAK_4WAY)
|
||||
|
||||
void keccakhash_4way( void *state, const void *input );
|
||||
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
void keccakhash( void *state, const void *input );
|
||||
int scanhash_keccak( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
@@ -44,7 +44,7 @@ extern "C"{
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define SPH_SIZE_keccak256 256
|
||||
|
||||
|
@@ -18,14 +18,15 @@ void keccakhash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_keccak(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_keccak( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t endiandata[32];
|
||||
|
@@ -91,7 +91,7 @@ extern "C"{
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
static const sph_u64 RC[] = {
|
||||
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
||||
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
||||
@@ -106,7 +106,7 @@ static const sph_u64 RC[] = {
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
||||
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
||||
};
|
||||
|
||||
*/
|
||||
#define kekDECL_STATE \
|
||||
sph_u64 keca00, keca01, keca02, keca03, keca04; \
|
||||
sph_u64 keca10, keca11, keca12, keca13, keca14; \
|
||||
@@ -756,6 +756,20 @@ static const sph_u64 RC[] = {
|
||||
* tested faster saving space
|
||||
*/
|
||||
#define KECCAK_F_1600_ do { \
|
||||
static const sph_u64 RC[] = { \
|
||||
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082), \
|
||||
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000), \
|
||||
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001), \
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009), \
|
||||
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088), \
|
||||
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A), \
|
||||
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B), \
|
||||
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003), \
|
||||
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080), \
|
||||
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A), \
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080), \
|
||||
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008) \
|
||||
}; \
|
||||
int j; \
|
||||
for (j = 0; j < 24; j += 4) { \
|
||||
KF_ELT( 0, 1, RC[j + 0]); \
|
||||
@@ -791,7 +805,7 @@ static const sph_u64 RC[] = {
|
||||
/* load initial constants */
|
||||
#define KEC_I
|
||||
|
||||
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
|
||||
//static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
|
||||
/*
|
||||
unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
||||
*/
|
||||
@@ -799,6 +813,7 @@ static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0
|
||||
/* load hash for loop */
|
||||
#define KEC_U \
|
||||
do { \
|
||||
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
||||
/*memcpy(hashbuf, hash, 64); */ \
|
||||
memcpy(hash + 64, keczword, 8); \
|
||||
} while (0);
|
||||
|
@@ -24,7 +24,7 @@
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
#define MASK _mm256_set_epi32( 0UL, 0UL, 0UL, 0xffffffffUL, \
|
||||
0UL, 0UL, 0UL, 0xffffffffUL )
|
||||
|
@@ -24,7 +24,7 @@
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "algo/sha/sha3-defs.h"
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
/* The length of digests*/
|
||||
#define DIGEST_BIT_LEN_224 224
|
||||
|
@@ -20,7 +20,7 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <emmintrin.h>
|
||||
#include "avxdefs.h"
|
||||
#include "simd-utils.h"
|
||||
#include "luffa_for_sse2.h"
|
||||
|
||||
#define MULT2(a0,a1) do \
|
||||
@@ -30,6 +30,19 @@
|
||||
a1 = _mm_or_si128( _mm_srli_si128(a1,4), _mm_slli_si128(b,12) ); \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
static inline __m256i mult2_avx2( a )
|
||||
{
|
||||
__m128 a0, a0, b;
|
||||
a0 = mm128_extractlo_256( a );
|
||||
a1 = mm128_extracthi_256( a );
|
||||
b = _mm_xor_si128( a0, _mm_shuffle_epi32( _mm_and_si128(a1,MASK), 16 ) );
|
||||
a0 = _mm_or_si128( _mm_srli_si128(b,4), _mm_slli_si128(a1,12) );
|
||||
a1 = _mm_or_si128( _mm_srli_si128(a1,4), _mm_slli_si128(b,12) );
|
||||
return mm256_concat_128( a1, a0 );
|
||||
}
|
||||
*/
|
||||
|
||||
#define STEP_PART(x,c,t)\
|
||||
SUBCRUMB(*x,*(x+1),*(x+2),*(x+3),*t);\
|
||||
SUBCRUMB(*(x+5),*(x+6),*(x+7),*(x+4),*t);\
|
||||
@@ -272,8 +285,8 @@ HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
{
|
||||
rnd512( state, mm_bswap_32( casti_m128i( data, 1 ) ),
|
||||
mm_bswap_32( casti_m128i( data, 0 ) ) );
|
||||
rnd512( state, mm128_bswap_32( casti_m128i( data, 1 ) ),
|
||||
mm128_bswap_32( casti_m128i( data, 0 ) ) );
|
||||
data += MSG_BLOCK_BYTE_LEN;
|
||||
}
|
||||
|
||||
@@ -282,7 +295,7 @@ HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// remaining data bytes
|
||||
casti_m128i( state->buffer, 0 ) = mm_bswap_32( cast_m128i( data ) );
|
||||
casti_m128i( state->buffer, 0 ) = mm128_bswap_32( cast_m128i( data ) );
|
||||
// padding of partial block
|
||||
casti_m128i( state->buffer, 1 ) =
|
||||
_mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
@@ -324,8 +337,8 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
{
|
||||
rnd512( state, mm_bswap_32( casti_m128i( data, 1 ) ),
|
||||
mm_bswap_32( casti_m128i( data, 0 ) ) );
|
||||
rnd512( state, mm128_bswap_32( casti_m128i( data, 1 ) ),
|
||||
mm128_bswap_32( casti_m128i( data, 0 ) ) );
|
||||
data += MSG_BLOCK_BYTE_LEN;
|
||||
}
|
||||
|
||||
@@ -334,7 +347,7 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
|
||||
{
|
||||
// padding of partial block
|
||||
rnd512( state, _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ),
|
||||
mm_bswap_32( cast_m128i( data ) ) );
|
||||
mm128_bswap_32( cast_m128i( data ) ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -587,8 +600,8 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
_mm_store_si128((__m128i*)&hash[0], t[0]);
|
||||
_mm_store_si128((__m128i*)&hash[4], t[1]);
|
||||
|
||||
casti_m128i( b, 0 ) = mm_bswap_32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 1 ) = mm_bswap_32( casti_m128i( hash, 1 ) );
|
||||
casti_m128i( b, 0 ) = mm128_bswap_32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 1 ) = mm128_bswap_32( casti_m128i( hash, 1 ) );
|
||||
|
||||
rnd512( state, zero, zero );
|
||||
|
||||
@@ -609,8 +622,8 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
_mm_store_si128((__m128i*)&hash[0], t[0]);
|
||||
_mm_store_si128((__m128i*)&hash[4], t[1]);
|
||||
|
||||
casti_m128i( b, 2 ) = mm_bswap_32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 3 ) = mm_bswap_32( casti_m128i( hash, 1 ) );
|
||||
casti_m128i( b, 2 ) = mm128_bswap_32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 3 ) = mm128_bswap_32( casti_m128i( hash, 1 ) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -77,6 +77,24 @@ static const sph_u32 V_INIT[5][8] = {
|
||||
}
|
||||
};
|
||||
|
||||
#if SPH_LUFFA_PARALLEL
|
||||
|
||||
static const sph_u64 RCW010[8] = {
|
||||
SPH_C64(0xb6de10ed303994a6), SPH_C64(0x70f47aaec0e65299),
|
||||
SPH_C64(0x0707a3d46cc33a12), SPH_C64(0x1c1e8f51dc56983e),
|
||||
SPH_C64(0x707a3d451e00108f), SPH_C64(0xaeb285627800423d),
|
||||
SPH_C64(0xbaca15898f5b7882), SPH_C64(0x40a46f3e96e1db12)
|
||||
};
|
||||
|
||||
static const sph_u64 RCW014[8] = {
|
||||
SPH_C64(0x01685f3de0337818), SPH_C64(0x05a17cf4441ba90d),
|
||||
SPH_C64(0xbd09caca7f34d442), SPH_C64(0xf4272b289389217f),
|
||||
SPH_C64(0x144ae5cce5a8bce6), SPH_C64(0xfaa7ae2b5274baf4),
|
||||
SPH_C64(0x2e48f1c126889ba7), SPH_C64(0xb923c7049a226e9d)
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
static const sph_u32 RC00[8] = {
|
||||
SPH_C32(0x303994a6), SPH_C32(0xc0e65299),
|
||||
SPH_C32(0x6cc33a12), SPH_C32(0xdc56983e),
|
||||
@@ -105,20 +123,18 @@ static const sph_u32 RC14[8] = {
|
||||
SPH_C32(0x2e48f1c1), SPH_C32(0xb923c704)
|
||||
};
|
||||
|
||||
#if SPH_LUFFA_PARALLEL
|
||||
|
||||
static const sph_u64 RCW010[8] = {
|
||||
SPH_C64(0xb6de10ed303994a6), SPH_C64(0x70f47aaec0e65299),
|
||||
SPH_C64(0x0707a3d46cc33a12), SPH_C64(0x1c1e8f51dc56983e),
|
||||
SPH_C64(0x707a3d451e00108f), SPH_C64(0xaeb285627800423d),
|
||||
SPH_C64(0xbaca15898f5b7882), SPH_C64(0x40a46f3e96e1db12)
|
||||
static const sph_u32 RC30[8] = {
|
||||
SPH_C32(0xb213afa5), SPH_C32(0xc84ebe95),
|
||||
SPH_C32(0x4e608a22), SPH_C32(0x56d858fe),
|
||||
SPH_C32(0x343b138f), SPH_C32(0xd0ec4e3d),
|
||||
SPH_C32(0x2ceb4882), SPH_C32(0xb3ad2208)
|
||||
};
|
||||
|
||||
static const sph_u64 RCW014[8] = {
|
||||
SPH_C64(0x01685f3de0337818), SPH_C64(0x05a17cf4441ba90d),
|
||||
SPH_C64(0xbd09caca7f34d442), SPH_C64(0xf4272b289389217f),
|
||||
SPH_C64(0x144ae5cce5a8bce6), SPH_C64(0xfaa7ae2b5274baf4),
|
||||
SPH_C64(0x2e48f1c126889ba7), SPH_C64(0xb923c7049a226e9d)
|
||||
static const sph_u32 RC34[8] = {
|
||||
SPH_C32(0xe028c9bf), SPH_C32(0x44756f91),
|
||||
SPH_C32(0x7e8fce32), SPH_C32(0x956548be),
|
||||
SPH_C32(0xfe191be2), SPH_C32(0x3cb226e5),
|
||||
SPH_C32(0x5944a28e), SPH_C32(0xa1c4c355)
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -137,19 +153,6 @@ static const sph_u32 RC24[8] = {
|
||||
SPH_C32(0x36eda57f), SPH_C32(0x703aace7)
|
||||
};
|
||||
|
||||
static const sph_u32 RC30[8] = {
|
||||
SPH_C32(0xb213afa5), SPH_C32(0xc84ebe95),
|
||||
SPH_C32(0x4e608a22), SPH_C32(0x56d858fe),
|
||||
SPH_C32(0x343b138f), SPH_C32(0xd0ec4e3d),
|
||||
SPH_C32(0x2ceb4882), SPH_C32(0xb3ad2208)
|
||||
};
|
||||
|
||||
static const sph_u32 RC34[8] = {
|
||||
SPH_C32(0xe028c9bf), SPH_C32(0x44756f91),
|
||||
SPH_C32(0x7e8fce32), SPH_C32(0x956548be),
|
||||
SPH_C32(0xfe191be2), SPH_C32(0x3cb226e5),
|
||||
SPH_C32(0x5944a28e), SPH_C32(0xa1c4c355)
|
||||
};
|
||||
|
||||
#if SPH_LUFFA_PARALLEL
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#include "allium-gate.h"
|
||||
#include "lyra2-gate.h"
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
|
||||
typedef struct {
|
||||
@@ -44,10 +44,11 @@ void allium_4way_hash( void *state, const void *input )
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash32 );
|
||||
|
||||
mm256_reinterleave_4x64( vhash64, vhash32, 256 );
|
||||
rintrlv_4x32_4x64( vhash64, vhash32, 256 );
|
||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
@@ -55,11 +56,11 @@ void allium_4way_hash( void *state, const void *input )
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*)hash0, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*)hash1, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*)hash2, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*)hash3, 32 );
|
||||
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
@@ -67,73 +68,64 @@ void allium_4way_hash( void *state, const void *input )
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
|
||||
mm256_interleave_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
|
||||
}
|
||||
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256_4way_init( &allium_4way_ctx.blake );
|
||||
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
allium_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user