mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
bfd1c002f9 | ||
![]() |
9edc650042 | ||
![]() |
218cef337a | ||
![]() |
9ffce7bdb7 | ||
![]() |
c7efa50aad |
147
README.md
147
README.md
@@ -7,6 +7,11 @@ All of the code is believed to be open and free. If anyone has a
|
||||
claim to any of it post your case in the cpuminer-opt Bitcoin Talk forum
|
||||
or by email.
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
https://bitcointalk.org/index.php?topic=1326803.0
|
||||
|
||||
mailto://jayddee246@gmail.com
|
||||
@@ -40,82 +45,84 @@ MacOS, OSx and Android are not supported.
|
||||
Supported Algorithms
|
||||
--------------------
|
||||
|
||||
allium Garlicoin
|
||||
anime Animecoin
|
||||
argon2 Argon2 coin (AR2)
|
||||
argon2d-crds Credits (CRDS)
|
||||
argon2d-dyn Dynamic (DYN)
|
||||
axiom Shabal-256 MemoHash
|
||||
allium Garlicoin
|
||||
anime Animecoin
|
||||
argon2 Argon2 coin (AR2)
|
||||
argon2d250 argon2d-crds, Credits (CRDS)
|
||||
argon2d500 argon2d-dyn, Dynamic (DYN)
|
||||
argon2d4096 argon2d-uis, Unitus, (UIS)
|
||||
axiom Shabal-256 MemoHash
|
||||
bastion
|
||||
blake Blake-256 (SFR)
|
||||
blakecoin blake256r8
|
||||
blake2s Blake-2 S
|
||||
bmw BMW 256
|
||||
c11 Chaincoin
|
||||
cryptolight Cryptonight-light
|
||||
cryptonight cryptonote, Monero (XMR)
|
||||
blake Blake-256 (SFR)
|
||||
blakecoin blake256r8
|
||||
blake2s Blake-2 S
|
||||
bmw BMW 256
|
||||
c11 Chaincoin
|
||||
cryptolight Cryptonight-light
|
||||
cryptonight
|
||||
cryptonightv7 Monero (XMR)
|
||||
decred
|
||||
deep Deepcoin (DCN)
|
||||
dmd-gr Diamond-Groestl
|
||||
drop Dropcoin
|
||||
fresh Fresh
|
||||
groestl Groestl coin
|
||||
heavy Heavy
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
jha Jackpotcoin
|
||||
keccak Maxcoin
|
||||
keccakc Creative coin
|
||||
lbry LBC, LBRY Credits
|
||||
luffa Luffa
|
||||
lyra2h Hppcoin
|
||||
lyra2re lyra2
|
||||
lyra2rev2 lyra2v2, Vertcoin
|
||||
lyra2z Zcoin (XZC)
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||
m7m Magi (XMG)
|
||||
myr-gr Myriad-Groestl
|
||||
neoscrypt NeoScrypt(128, 2, 1)
|
||||
nist5 Nist5
|
||||
pentablake Pentablake
|
||||
phi1612 phi, LUX coin
|
||||
pluck Pluck:128 (Supcoin)
|
||||
polytimos Ninja
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
deep Deepcoin (DCN)
|
||||
dmd-gr Diamond-Groestl
|
||||
drop Dropcoin
|
||||
fresh Fresh
|
||||
groestl Groestl coin
|
||||
heavy Heavy
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
jha Jackpotcoin
|
||||
keccak Maxcoin
|
||||
keccakc Creative coin
|
||||
lbry LBC, LBRY Credits
|
||||
luffa Luffa
|
||||
lyra2h Hppcoin
|
||||
lyra2re lyra2
|
||||
lyra2rev2 lyra2v2, Vertcoin
|
||||
lyra2z Zcoin (XZC)
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||
m7m Magi (XMG)
|
||||
myr-gr Myriad-Groestl
|
||||
neoscrypt NeoScrypt(128, 2, 1)
|
||||
nist5 Nist5
|
||||
pentablake Pentablake
|
||||
phi1612 phi, LUX coin
|
||||
pluck Pluck:128 (Supcoin)
|
||||
polytimos Ninja
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
scryptjane:nf
|
||||
sha256d Double SHA-256
|
||||
sha256t Triple SHA-256, Onecoin (OC)
|
||||
shavite3 Shavite3
|
||||
skein Skein+Sha (Skeincoin)
|
||||
skein2 Double Skein (Woodcoin)
|
||||
skunk Signatum (SIGT)
|
||||
timetravel Machinecoin (MAC)
|
||||
timetravel10 Bitcore
|
||||
tribus Denarius (DNR)
|
||||
vanilla blake256r8vnl (VCash)
|
||||
veltor (VLT)
|
||||
sha256d Double SHA-256
|
||||
sha256t Triple SHA-256, Onecoin (OC)
|
||||
shavite3 Shavite3
|
||||
skein Skein+Sha (Skeincoin)
|
||||
skein2 Double Skein (Woodcoin)
|
||||
skunk Signatum (SIGT)
|
||||
timetravel Machinecoin (MAC)
|
||||
timetravel10 Bitcore
|
||||
tribus Denarius (DNR)
|
||||
vanilla blake256r8vnl (VCash)
|
||||
veltor (VLT)
|
||||
whirlpool
|
||||
whirlpoolx
|
||||
x11 Dash
|
||||
x11evo Revolvercoin
|
||||
x11gost sib (SibCoin)
|
||||
x12 Galaxie Cash (GCH)
|
||||
x13 X13
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r Ravencoin (RVN)
|
||||
x16s pigeoncoin (PGN)
|
||||
x11 Dash
|
||||
x11evo Revolvercoin
|
||||
x11gost sib (SibCoin)
|
||||
x12 Galaxie Cash (GCH)
|
||||
x13 X13
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r Ravencoin (RVN)
|
||||
x16s pigeoncoin (PGN)
|
||||
x17
|
||||
xevan Bitsend (BSD)
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr16 Yenten (YTN)
|
||||
yescryptr32 WAVI
|
||||
zr5 Ziftr
|
||||
xevan Bitsend (BSD)
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr16 Yenten (YTN)
|
||||
yescryptr32 WAVI
|
||||
zr5 Ziftr
|
||||
|
||||
Errata
|
||||
------
|
||||
|
19
README.txt
19
README.txt
@@ -4,6 +4,11 @@ for Linux and Windows can be found in RELEASE_NOTES.
|
||||
cpuminer is a console program that is executed from a DOS command prompt.
|
||||
There is no GUI and no mouse support.
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
Choose the exe that best matches you CPU's features or use trial and
|
||||
error to find the fastest one that doesn't crash. Pay attention to
|
||||
the features listed at cpuminer startup to ensure you are mining at
|
||||
@@ -16,14 +21,16 @@ AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
Changes in v3.8.4 may have improved compatibility with some of these CPUs.
|
||||
|
||||
Exe name Compile flags Arch name
|
||||
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-maes -msse4.2" Westmere
|
||||
cpuminer-aes-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2" Haswell...
|
||||
cpuminer-avx2-sha.exe "-march=core-avx2 -msha" Ryzen
|
||||
Exe name Compile flags Arch name
|
||||
|
||||
cpuminer-sse2.exe "-msse2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-march=westmere" Westmere, Sandy-Ivybridge
|
||||
cpuminer-avx.exe "-march=corei7-avx" Sandy-Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2" Haswell, Sky-Kaby-Coffeelake
|
||||
cpuminer-avx2-sha.exe "-march=core-avx2 -msha" Ryzen
|
||||
|
||||
If you like this software feel free to donate:
|
||||
|
||||
|
@@ -13,11 +13,11 @@ Security warning
|
||||
----------------
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are miners. The source
|
||||
code is open for anyone to inspect. If you don't trust the software, don't use
|
||||
it.
|
||||
a false positive, they are flagged simply because they are cryptocurrency
|
||||
miners. The source code is open for anyone to inspect. If you don't trust
|
||||
the software, don't use it.
|
||||
|
||||
The cryptographic code has been taken from trusted sources but has been
|
||||
The cryptographic hashing code has been taken from trusted sources but has been
|
||||
modified for speed at the expense of accepted security practices. This
|
||||
code should not be imported into applications where secure cryptography is
|
||||
required.
|
||||
@@ -50,8 +50,7 @@ will give a clue as to the missing package.
|
||||
The following command should install everything you need on Debian based
|
||||
distributions such as Ubuntu:
|
||||
|
||||
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake
|
||||
|
||||
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake zlib1g-dev
|
||||
|
||||
build-essential (for Ubuntu, Development Tools package group on Fedora)
|
||||
automake
|
||||
@@ -81,12 +80,12 @@ cd cpuminer-opt-x.y.z
|
||||
Run ./build.sh to build on Linux or execute the following commands.
|
||||
|
||||
./autogen.sh
|
||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
make
|
||||
|
||||
Additional optional compile flags, add the following to CFLAGS to activate:
|
||||
|
||||
-DUSE_SPH_SHA
|
||||
-DUSE_SPH_SHA (deprecated)
|
||||
|
||||
SPH may give slightly better performance on algos that use sha256 when using
|
||||
openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
|
||||
@@ -160,6 +159,40 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.8.1
|
||||
|
||||
Fixed x16r.
|
||||
Removed cryptonight variant check due to false positives.
|
||||
API displays hashrate before shares are submitted.
|
||||
|
||||
v3.8.8
|
||||
|
||||
Added cryptonightv7 for Monero.
|
||||
|
||||
v3.8.7.2
|
||||
|
||||
Fixed argon2d-dyn regression in v3.8.7.1.
|
||||
Changed compile options for aes-sse42 Windows build to -march=westmere
|
||||
|
||||
v3.8.7.1
|
||||
|
||||
Fixed argon2d-uis low difficulty rejects.
|
||||
Fixed argon2d aliases.
|
||||
|
||||
v3.8.7
|
||||
|
||||
Added argon2d4096 (alias argon2d-uis) for Unitus (UIS).
|
||||
argon2d-crds and argon2d-dyn renamed to argon2d250 and argon2d500 respectively.
|
||||
The old names are recognized as aliases.
|
||||
AVX512 is now supported for argon2d algos, Linux only.
|
||||
AVX is no longer a reported feature and an AVX Windows binary is no longer
|
||||
provided. Use AES-SSE42 build instead.
|
||||
|
||||
v3.8.6.1
|
||||
|
||||
Faster argon2d* AVX2.
|
||||
Untested AVX-512 for argon2d*, YMMV.
|
||||
|
||||
v3.8.6
|
||||
|
||||
Fixed argon2 regression in v3.8.5.
|
||||
|
69
aclocal.m4
vendored
69
aclocal.m4
vendored
@@ -1,6 +1,6 @@
|
||||
# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
|
||||
# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
|
||||
If you have problems, you may need to regenerate the build system entirely.
|
||||
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
||||
|
||||
# Copyright (C) 2002-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2002-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
|
||||
# generated from the m4 files accompanying Automake X.Y.
|
||||
# (This private macro should not be called outside this file.)
|
||||
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||
[am__api_version='1.14'
|
||||
[am__api_version='1.15'
|
||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||
dnl require some minimum version. Point them to the right macro.
|
||||
m4_if([$1], [1.14.1], [],
|
||||
m4_if([$1], [1.15.1], [],
|
||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||
])
|
||||
|
||||
@@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
|
||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||
[AM_AUTOMAKE_VERSION([1.14.1])dnl
|
||||
[AM_AUTOMAKE_VERSION([1.15.1])dnl
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
||||
|
||||
# Figure out how to run the assembler. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -78,7 +78,7 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
|
||||
|
||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -123,15 +123,14 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
|
||||
# configured tree to be moved without reconfiguration.
|
||||
|
||||
AC_DEFUN([AM_AUX_DIR_EXPAND],
|
||||
[dnl Rely on autoconf to set up CDPATH properly.
|
||||
AC_PREREQ([2.50])dnl
|
||||
# expand $ac_aux_dir to an absolute path
|
||||
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||
[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
|
||||
# Expand $ac_aux_dir to an absolute path.
|
||||
am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
||||
])
|
||||
|
||||
# AM_CONDITIONAL -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -162,7 +161,7 @@ AC_CONFIG_COMMANDS_PRE(
|
||||
Usually this means the macro was only invoked conditionally.]])
|
||||
fi])])
|
||||
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -353,7 +352,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
|
||||
|
||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -429,7 +428,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
|
||||
# Do all the work for Automake. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -519,8 +518,8 @@ AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
|
||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
|
||||
# We need awk for the "check" target. The system "awk" is bad on
|
||||
# some platforms.
|
||||
# We need awk for the "check" target (and possibly the TAP driver). The
|
||||
# system "awk" is bad on some platforms.
|
||||
AC_REQUIRE([AC_PROG_AWK])dnl
|
||||
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
|
||||
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||
@@ -593,7 +592,11 @@ to "yes", and re-run configure.
|
||||
END
|
||||
AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
|
||||
fi
|
||||
fi])
|
||||
fi
|
||||
dnl The trailing newline in this macro's definition is deliberate, for
|
||||
dnl backward compatibility and to allow trailing 'dnl'-style comments
|
||||
dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
|
||||
])
|
||||
|
||||
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
|
||||
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
|
||||
@@ -622,7 +625,7 @@ for _am_header in $config_headers :; do
|
||||
done
|
||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -633,7 +636,7 @@ echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_co
|
||||
# Define $install_sh.
|
||||
AC_DEFUN([AM_PROG_INSTALL_SH],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
if test x"${install_sh}" != xset; then
|
||||
if test x"${install_sh+set}" != xset; then
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
||||
@@ -643,7 +646,7 @@ if test x"${install_sh}" != xset; then
|
||||
fi
|
||||
AC_SUBST([install_sh])])
|
||||
|
||||
# Copyright (C) 2003-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2003-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -665,7 +668,7 @@ AC_SUBST([am__leading_dot])])
|
||||
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
|
||||
# From Jim Meyering
|
||||
|
||||
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -700,7 +703,7 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
||||
|
||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -750,7 +753,7 @@ rm -f confinc confmf
|
||||
|
||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -789,7 +792,7 @@ fi
|
||||
|
||||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -818,7 +821,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
|
||||
AC_DEFUN([_AM_IF_OPTION],
|
||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -865,7 +868,7 @@ AC_LANG_POP([C])])
|
||||
# For backward compatibility.
|
||||
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -884,7 +887,7 @@ AC_DEFUN([AM_RUN_LOG],
|
||||
|
||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -965,7 +968,7 @@ AC_CONFIG_COMMANDS_PRE(
|
||||
rm -f conftest.file
|
||||
])
|
||||
|
||||
# Copyright (C) 2009-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2009-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1025,7 +1028,7 @@ AC_SUBST([AM_BACKSLASH])dnl
|
||||
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
|
||||
])
|
||||
|
||||
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1053,7 +1056,7 @@ fi
|
||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||
|
||||
# Copyright (C) 2006-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2006-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1072,7 +1075,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
|
||||
|
||||
# Check how to create a tarball. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2004-2013 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2004-2017 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
|
153
algo-gate-api.c
153
algo-gate-api.c
@@ -157,81 +157,83 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
|
||||
switch (algo)
|
||||
{
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_ARGON2DCRDS: register_argon2d_crds_algo( gate ); break;
|
||||
case ALGO_ARGON2DDYN: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_ARGON2D250: register_argon2d_crds_algo ( gate ); break;
|
||||
case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL10: register_timetravel10_algo( gate ); break;
|
||||
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
|
||||
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
|
||||
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHTV7:register_cryptonightv7_algo( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL10: register_timetravel10_algo ( gate ); break;
|
||||
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
|
||||
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
|
||||
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
return false;
|
||||
@@ -288,6 +290,9 @@ void exec_hash_function( int algo, void *output, const void *pdata )
|
||||
const char* const algo_alias_map[][2] =
|
||||
{
|
||||
// alias proper
|
||||
{ "argon2d-crds", "argon2d250" },
|
||||
{ "argon2d-dyn", "argon2d500" },
|
||||
{ "argon2d-uis", "argon2d4096" },
|
||||
{ "bitcore", "timetravel10" },
|
||||
{ "bitzeny", "yescryptr8" },
|
||||
{ "blake256r8", "blakecoin" },
|
||||
|
@@ -2,6 +2,8 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "miner.h"
|
||||
#include "avxdefs.h"
|
||||
#include "interleave.h"
|
||||
|
||||
/////////////////////////////
|
||||
////
|
||||
@@ -91,6 +93,7 @@ typedef uint32_t set_t;
|
||||
#define AVX_OPT 8
|
||||
#define AVX2_OPT 0x10
|
||||
#define SHA_OPT 0x20
|
||||
#define AVX512_OPT 0x40
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
||||
|
@@ -79,7 +79,7 @@ int64_t argon2_get_max64 ()
|
||||
|
||||
bool register_argon2_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_argon2;
|
||||
gate->hash = (void*)&argon2hash;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
|
@@ -28,6 +28,7 @@ void argon2d_crds_hash( void *output, const void *input )
|
||||
context.lanes = 4; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 1; // Iterations
|
||||
context.version = ARGON2_VERSION_10;
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
@@ -70,7 +71,8 @@ bool register_argon2d_crds_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_argon2d_crds;
|
||||
gate->hash = (void*)&argon2d_crds_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Dynamic
|
||||
@@ -96,6 +98,7 @@ void argon2d_dyn_hash( void *output, const void *input )
|
||||
context.lanes = 8; // Degree of Parallelism
|
||||
context.threads = 1; // Threads
|
||||
context.t_cost = 2; // Iterations
|
||||
context.version = ARGON2_VERSION_10;
|
||||
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
@@ -138,6 +141,58 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_argon2d_dyn;
|
||||
gate->hash = (void*)&argon2d_dyn_hash;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Unitus
|
||||
|
||||
int scanhash_argon2d4096( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
uint32_t t_cost = 1; // 1 iteration
|
||||
uint32_t m_cost = 4096; // use 4MB
|
||||
uint32_t parallelism = 1; // 1 thread, 2 lanes
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
argon2d_hash_raw( t_cost, m_cost, parallelism, (char*) endiandata, 80,
|
||||
(char*) endiandata, 80, (char*) vhash, 32, ARGON2_VERSION_13 );
|
||||
if ( vhash[7] < Htarg && fulltest( vhash, ptarget ) )
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t get_max64_0x1ff() { return 0x1ff; }
|
||||
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_argon2d4096;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0x1ff;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
// Credits
|
||||
// Credits: version = 0x10, m_cost = 250.
|
||||
bool register_argon2d_crds_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_crds_hash( void *state, const void *input );
|
||||
@@ -12,7 +12,7 @@ void argon2d_crds_hash( void *state, const void *input );
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
// Dynamic
|
||||
// Dynamic: version = 0x10, m_cost = 500.
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_dyn_hash( void *state, const void *input );
|
||||
@@ -21,5 +21,11 @@ int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
|
||||
// Unitus: version = 0x13, m_cost = 4096.
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate );
|
||||
|
||||
int scanhash_argon2d4096( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -19,10 +19,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include "argon2.h"
|
||||
#include "encoding.h"
|
||||
#include "core.h"
|
||||
@@ -31,6 +27,10 @@ const char *argon2_type2string(argon2_type type, int uppercase) {
|
||||
switch (type) {
|
||||
case Argon2_d:
|
||||
return uppercase ? "Argon2d" : "argon2d";
|
||||
case Argon2_i:
|
||||
return uppercase ? "Argon2i" : "argon2i";
|
||||
case Argon2_id:
|
||||
return uppercase ? "Argon2id" : "argon2id";
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@@ -46,7 +46,7 @@ int argon2_ctx(argon2_context *context, argon2_type type) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (Argon2_d != type) {
|
||||
if (Argon2_d != type && Argon2_i != type && Argon2_id != type) {
|
||||
return ARGON2_INCORRECT_TYPE;
|
||||
}
|
||||
|
||||
@@ -62,18 +62,18 @@ int argon2_ctx(argon2_context *context, argon2_type type) {
|
||||
/* Ensure that all segments have equal length */
|
||||
memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.version = context->version;
|
||||
instance.memory = NULL;
|
||||
instance.passes = context->t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context->lanes;
|
||||
instance.limit = 1;
|
||||
instance.threads = context->threads;
|
||||
instance.type = type;
|
||||
|
||||
if (instance.threads > instance.limit) {
|
||||
instance.threads = instance.limit;
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
@@ -101,7 +101,8 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt, const size_t saltlen,
|
||||
void *hash, const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type){
|
||||
const size_t encodedlen, argon2_type type,
|
||||
const uint32_t version){
|
||||
|
||||
argon2_context context;
|
||||
int result;
|
||||
@@ -145,6 +146,7 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = version;
|
||||
|
||||
result = argon2_ctx(&context, type);
|
||||
|
||||
@@ -174,23 +176,69 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen,
|
||||
const uint32_t version) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_i,
|
||||
version );
|
||||
}
|
||||
|
||||
int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen,
|
||||
const uint32_t version ) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_i, version );
|
||||
}
|
||||
|
||||
int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen) {
|
||||
char *encoded, const size_t encodedlen,
|
||||
const uint32_t version ) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_d);
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_d,
|
||||
version );
|
||||
}
|
||||
|
||||
int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
const size_t saltlen, void *hash, const size_t hashlen,
|
||||
const uint32_t version ) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_d);
|
||||
hash, hashlen, NULL, 0, Argon2_d, version );
|
||||
}
|
||||
|
||||
int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, const size_t hashlen,
|
||||
char *encoded, const size_t encodedlen,
|
||||
const uint32_t version ) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
NULL, hashlen, encoded, encodedlen, Argon2_id,
|
||||
version);
|
||||
}
|
||||
|
||||
int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen,
|
||||
const uint32_t version ) {
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, NULL, 0, Argon2_id, version );
|
||||
}
|
||||
|
||||
static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) {
|
||||
@@ -269,15 +317,33 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
|
||||
|
||||
return argon2_verify(encoded, pwd, pwdlen, Argon2_i);
|
||||
}
|
||||
|
||||
int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
|
||||
|
||||
return argon2_verify(encoded, pwd, pwdlen, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
|
||||
|
||||
return argon2_verify(encoded, pwd, pwdlen, Argon2_id);
|
||||
}
|
||||
|
||||
int argon2d_ctx(argon2_context *context) {
|
||||
return argon2_ctx(context, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2i_ctx(argon2_context *context) {
|
||||
return argon2_ctx(context, Argon2_i);
|
||||
}
|
||||
|
||||
int argon2id_ctx(argon2_context *context) {
|
||||
return argon2_ctx(context, Argon2_id);
|
||||
}
|
||||
|
||||
int argon2_verify_ctx(argon2_context *context, const char *hash,
|
||||
argon2_type type) {
|
||||
int ret = argon2_ctx(context, type);
|
||||
@@ -296,6 +362,14 @@ int argon2d_verify_ctx(argon2_context *context, const char *hash) {
|
||||
return argon2_verify_ctx(context, hash, Argon2_d);
|
||||
}
|
||||
|
||||
int argon2i_verify_ctx(argon2_context *context, const char *hash) {
|
||||
return argon2_verify_ctx(context, hash, Argon2_i);
|
||||
}
|
||||
|
||||
int argon2id_verify_ctx(argon2_context *context, const char *hash) {
|
||||
return argon2_verify_ctx(context, hash, Argon2_id);
|
||||
}
|
||||
|
||||
const char *argon2_error_message(int error_code) {
|
||||
switch (error_code) {
|
||||
case ARGON2_OK:
|
||||
@@ -374,307 +448,11 @@ const char *argon2_error_message(int error_code) {
|
||||
return "Unknown error code";
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism,
|
||||
uint32_t saltlen, uint32_t hashlen, argon2_type type) {
|
||||
return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) +
|
||||
numlen(t_cost) + numlen(m_cost) + numlen(parallelism) +
|
||||
b64len(saltlen) + b64len(hashlen);
|
||||
b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + 1;
|
||||
}
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
///////////////////////////
|
||||
// Wolf's Additions
|
||||
///////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <pthread.h>
|
||||
#include <x86intrin.h>
|
||||
#include "../blake2/blake2.h"
|
||||
|
||||
typedef struct _Argon2d_Block
|
||||
{
|
||||
union
|
||||
{
|
||||
uint64_t data[1024 / 8] __attribute__((aligned(32)));
|
||||
__m128i dqwords[1024 / 16] __attribute__((aligned(32)));
|
||||
__m256i qqwords[1024 / 32] __attribute__((aligned(32)));
|
||||
};
|
||||
} Argon2d_Block;
|
||||
|
||||
typedef struct _Argon2ThreadData
|
||||
{
|
||||
Argon2d_Block *Matrix;
|
||||
uint32_t slice;
|
||||
uint32_t lane;
|
||||
} Argon2ThreadData;
|
||||
|
||||
#define SEGMENT_LENGTH (250U / (4U * 4U)) // memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
|
||||
#define LANE_LENGTH (SEGMENT_LENGTH * 4U) // segment_length * ARGON2_SYNC_POINTS;
|
||||
#define CONCURRENT_THREADS 4
|
||||
|
||||
static const uint64_t blake2b_IV[8] =
|
||||
{
|
||||
0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL,
|
||||
0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL,
|
||||
0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL,
|
||||
0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL
|
||||
};
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] =
|
||||
{
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = ROTL64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = ROTL64(b ^ c, 40); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = ROTL64(d ^ a, 48); \
|
||||
c = c + d; \
|
||||
b = ROTL64(b ^ c, 1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
void CompressBlock(uint64_t *h, const uint64_t *m, uint64_t t, uint64_t f)
|
||||
{
|
||||
uint64_t v[16];
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 8; ++i) v[i] = h[i];
|
||||
|
||||
for(i = 8; i < 16; ++i) v[i] = blake2b_IV[i - 8];
|
||||
|
||||
v[12] ^= t;
|
||||
v[14] ^= f;
|
||||
|
||||
int r;
|
||||
for(r = 0; r < 12; ++r)
|
||||
{
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i) h[i] ^= v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
void Argon2dInitHash(void *HashOut, void *Input)
|
||||
{
|
||||
blake2b_state BlakeHash;
|
||||
uint32_t InBuf[64]; // Is only 50 uint32_t, but need more space for Blake2B
|
||||
|
||||
memset(InBuf, 0x00, 200);
|
||||
|
||||
InBuf[0] = 4UL; // Lanes
|
||||
InBuf[1] = 32UL; // Output Length
|
||||
InBuf[2] = 250UL; // Memory Cost
|
||||
InBuf[3] = 1UL; // Time Cost
|
||||
InBuf[4] = 16UL; // Argon2 Version Number
|
||||
InBuf[5] = 0UL; // Type
|
||||
InBuf[6] = 80UL; // Password Length
|
||||
|
||||
memcpy(InBuf + 7, Input, 80); // Password
|
||||
|
||||
InBuf[27] = 80UL; // Salt Length
|
||||
|
||||
memcpy(InBuf + 28, Input, 80); // Salt
|
||||
|
||||
InBuf[48] = 0UL; // Secret Length
|
||||
InBuf[49] = 0UL; // Associated Data Length
|
||||
|
||||
int i;
|
||||
for(i = 50; i < 64; ++i) InBuf[i] = 0UL;
|
||||
|
||||
uint64_t H[8];
|
||||
|
||||
for(i = 0; i < 8; ++i) H[i] = blake2b_IV[i];
|
||||
|
||||
H[0] ^= 0x0000000001010040;
|
||||
|
||||
CompressBlock(H, (uint64_t *)InBuf, 128ULL, 0ULL);
|
||||
CompressBlock(H, (uint64_t *)(InBuf + 32), 200ULL, 0xFFFFFFFFFFFFFFFFULL);
|
||||
|
||||
memcpy(HashOut, H, 64U);
|
||||
}
|
||||
|
||||
void Argon2dFillFirstBlocks(Argon2d_Block *Matrix, void *InitHash)
|
||||
{
|
||||
uint32_t lane;
|
||||
for(lane = 0; lane < 4; ++lane)
|
||||
{
|
||||
((uint32_t *)InitHash)[16] = 0;
|
||||
((uint32_t *)InitHash)[17] = lane;
|
||||
blake2b_long(Matrix[lane * LANE_LENGTH].data, 1024, InitHash, 72);
|
||||
((uint32_t *)InitHash)[16] |= 1;
|
||||
blake2b_long(Matrix[lane * LANE_LENGTH + 1].data, 1024, InitHash, 72);
|
||||
}
|
||||
}
|
||||
|
||||
#include "../blake2/blamka-round-opt.h"
|
||||
|
||||
void Argon2dFillSingleBlock(Argon2d_Block *State, Argon2d_Block *RefBlock, Argon2d_Block *NextBlock)
|
||||
{
|
||||
__m256i XY[32];
|
||||
|
||||
int i;
|
||||
for(i = 0; i < 32; ++i)
|
||||
XY[i] = State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], RefBlock->qqwords[i]);
|
||||
|
||||
for(i = 0; i < 8; ++i)
|
||||
{
|
||||
BLAKE2_ROUND( State->dqwords[8 * i + 0], State->dqwords[8 * i + 1], State->dqwords[8 * i + 2], State->dqwords[8 * i + 3],
|
||||
State->dqwords[8 * i + 4], State->dqwords[8 * i + 5], State->dqwords[8 * i + 6], State->dqwords[8 * i + 7]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 8; ++i)
|
||||
{
|
||||
BLAKE2_ROUND( State->dqwords[8 * 0 + i], State->dqwords[8 * 1 + i], State->dqwords[8 * 2 + i], State->dqwords[8 * 3 + i],
|
||||
State->dqwords[8 * 4 + i], State->dqwords[8 * 5 + i], State->dqwords[8 * 6 + i], State->dqwords[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for(i = 0; i < 32; ++i)
|
||||
{
|
||||
State->qqwords[i] = _mm256_xor_si256(State->qqwords[i], XY[i]);
|
||||
_mm256_store_si256(NextBlock->qqwords + i, State->qqwords[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void FillSegment(Argon2d_Block *Matrix, uint32_t slice, uint32_t lane)
|
||||
{
|
||||
uint32_t startidx, prevoff, curoff;
|
||||
Argon2d_Block State;
|
||||
|
||||
startidx = (!slice) ? 2 : 0;
|
||||
curoff = lane * LANE_LENGTH + slice * SEGMENT_LENGTH + startidx;
|
||||
|
||||
//if(!(curoff % LANE_LENGTH)) prevoff = curoff + LANE_LENGTH - 1;
|
||||
//else prevoff = curoff - 1;
|
||||
|
||||
prevoff = (!(curoff % LANE_LENGTH)) ? curoff + LANE_LENGTH - 1 : curoff - 1;
|
||||
|
||||
memcpy(State.data, (Matrix + prevoff)->data, 1024);
|
||||
|
||||
int i;
|
||||
for(i = startidx; i < SEGMENT_LENGTH; ++i, ++curoff, ++prevoff)
|
||||
{
|
||||
if((curoff % LANE_LENGTH) == 1) prevoff = curoff - 1;
|
||||
|
||||
uint64_t pseudorand = Matrix[prevoff].data[0];
|
||||
uint64_t reflane = (!slice) ? lane : (pseudorand >> 32) & 3; // mod lanes
|
||||
|
||||
uint32_t index = i;
|
||||
bool samelane = reflane == lane;
|
||||
pseudorand &= 0xFFFFFFFFULL;
|
||||
uint32_t refareasize = ((reflane == lane) ? slice * SEGMENT_LENGTH + index - 1 : slice * SEGMENT_LENGTH + ((!index) ? -1 : 0));
|
||||
|
||||
|
||||
if(!slice) refareasize = index - 1;
|
||||
|
||||
uint64_t relativepos = (pseudorand & 0xFFFFFFFFULL);
|
||||
relativepos = relativepos * relativepos >> 32;
|
||||
relativepos = refareasize - 1 - (refareasize * relativepos >> 32);
|
||||
|
||||
uint32_t startpos = 0;
|
||||
|
||||
uint32_t abspos = (startpos + relativepos) % LANE_LENGTH;
|
||||
|
||||
uint32_t refidx = abspos;
|
||||
|
||||
Argon2dFillSingleBlock(&State, Matrix + (LANE_LENGTH * reflane + refidx), Matrix + curoff);
|
||||
}
|
||||
}
|
||||
|
||||
void *ThreadedSegmentFill(void *ThrData)
|
||||
{
|
||||
Argon2ThreadData *Data = (Argon2ThreadData *)ThrData;
|
||||
|
||||
FillSegment(Data->Matrix, Data->slice, Data->lane);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
void Argon2dFillAllBlocks(Argon2d_Block *Matrix)
|
||||
{
|
||||
pthread_t ThrHandles[CONCURRENT_THREADS];
|
||||
Argon2ThreadData ThrData[CONCURRENT_THREADS];
|
||||
|
||||
int s;
|
||||
for(s = 0; s < 4; ++s)
|
||||
{
|
||||
// WARNING: Assumes CONCURRENT_THREADS == lanes == 4
|
||||
int l;
|
||||
for(l = 0; l < 4; ++l)
|
||||
{
|
||||
FillSegment(Matrix, s, l);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Argon2dFinalizeHash(void *OutputHash, Argon2d_Block *Matrix)
|
||||
{
|
||||
int l;
|
||||
for(l = 1; l < 4; ++l)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < 32; ++i)
|
||||
Matrix[LANE_LENGTH - 1].qqwords[i] = _mm256_xor_si256(Matrix[LANE_LENGTH - 1].qqwords[i], Matrix[LANE_LENGTH * l + (LANE_LENGTH - 1)].qqwords[i]);
|
||||
}
|
||||
|
||||
blake2b_long(OutputHash, 32, Matrix[LANE_LENGTH - 1].data, 1024);
|
||||
}
|
||||
|
||||
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr)
|
||||
{
|
||||
uint8_t tmp[72];
|
||||
|
||||
Argon2dInitHash(tmp, (uint8_t *)BlkHdr);
|
||||
|
||||
Argon2dFillFirstBlocks(Matrix, tmp);
|
||||
|
||||
Argon2dFillAllBlocks(Matrix);
|
||||
|
||||
Argon2dFinalizeHash((uint8_t *)Output, Matrix);
|
||||
}
|
||||
|
||||
void WolfArgon2dAllocateCtx(void **Matrix)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)_aligned_malloc(32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
#else
|
||||
*((Argon2d_Block **)Matrix) = (Argon2d_Block *)malloc(sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
posix_memalign(Matrix, 32, sizeof(Argon2d_Block) * (SEGMENT_LENGTH << 4));
|
||||
#endif
|
||||
}
|
||||
|
||||
void WolfArgon2dFreeCtx(void *Matrix)
|
||||
{
|
||||
free(Matrix);
|
||||
}
|
||||
|
||||
#endif
|
||||
*/
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -29,10 +29,13 @@ extern "C" {
|
||||
/* Symbols visibility control */
|
||||
#ifdef A2_VISCTL
|
||||
#define ARGON2_PUBLIC __attribute__((visibility("default")))
|
||||
#define ARGON2_LOCAL __attribute__ ((visibility ("hidden")))
|
||||
#elif _MSC_VER
|
||||
#define ARGON2_PUBLIC __declspec(dllexport)
|
||||
#define ARGON2_LOCAL
|
||||
#else
|
||||
#define ARGON2_PUBLIC
|
||||
#define ARGON2_LOCAL
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -206,6 +209,8 @@ typedef struct Argon2_Context {
|
||||
uint32_t lanes; /* number of lanes */
|
||||
uint32_t threads; /* maximum number of threads */
|
||||
|
||||
uint32_t version; /* version number */
|
||||
|
||||
allocate_fptr allocate_cbk; /* pointer to memory allocator */
|
||||
deallocate_fptr free_cbk; /* pointer to memory deallocator */
|
||||
|
||||
@@ -214,9 +219,15 @@ typedef struct Argon2_Context {
|
||||
|
||||
/* Argon2 primitive type */
|
||||
typedef enum Argon2_type {
|
||||
Argon2_d = 0
|
||||
Argon2_d = 0,
|
||||
Argon2_i = 1,
|
||||
Argon2_id = 2
|
||||
} argon2_type;
|
||||
|
||||
/* Version of the algorithm */
|
||||
#define ARGON2_VERSION_10 0x10
|
||||
#define ARGON2_VERSION_13 0x13
|
||||
|
||||
/*
|
||||
* Function that gives the string representation of an argon2_type.
|
||||
* @param type The argon2_type that we want the string for
|
||||
@@ -233,8 +244,31 @@ ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase);
|
||||
ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
|
||||
|
||||
/**
|
||||
* Hashes a password with Argon2i, producing a raw hash by allocating memory at
|
||||
* @hash
|
||||
* Hashes a password with Argon2i, producing an encoded hash
|
||||
* @param t_cost Number of iterations
|
||||
* @param m_cost Sets memory usage to m_cost kibibytes
|
||||
* @param parallelism Number of threads and compute lanes
|
||||
* @param pwd Pointer to password
|
||||
* @param pwdlen Password size in bytes
|
||||
* @param salt Pointer to salt
|
||||
* @param saltlen Salt size in bytes
|
||||
* @param hashlen Desired length of the hash in bytes
|
||||
* @param encoded Buffer where to write the encoded hash
|
||||
* @param encodedlen Size of the buffer (thus max size of the encoded hash)
|
||||
* @pre Different parallelism levels will give different results
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen,
|
||||
const uint32_t version );
|
||||
|
||||
/**
|
||||
* Hashes a password with Argon2i, producing a raw hash at @hash
|
||||
* @param t_cost Number of iterations
|
||||
* @param m_cost Sets memory usage to m_cost kibibytes
|
||||
* @param parallelism Number of threads and compute lanes
|
||||
@@ -247,11 +281,12 @@ ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
|
||||
* @pre Different parallelism levels will give different results
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
const size_t hashlen,
|
||||
const uint32_t version );
|
||||
|
||||
ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
@@ -259,7 +294,32 @@ ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen);
|
||||
const size_t encodedlen,
|
||||
const uint32_t version );
|
||||
|
||||
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen,
|
||||
const uint32_t version );
|
||||
|
||||
ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism,
|
||||
const void *pwd, const size_t pwdlen,
|
||||
const void *salt, const size_t saltlen,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen,
|
||||
const uint32_t version );
|
||||
|
||||
ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen,
|
||||
const uint32_t version );
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
@@ -267,7 +327,8 @@ ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen, char *encoded,
|
||||
const size_t encodedlen, argon2_type type);
|
||||
const size_t encodedlen, argon2_type type,
|
||||
const uint32_t version );
|
||||
|
||||
/**
|
||||
* Verifies a password against an encoded string
|
||||
@@ -276,9 +337,15 @@ ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
* @param pwd Pointer to password
|
||||
* @pre Returns ARGON2_OK if successful
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2i_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen);
|
||||
|
||||
ARGON2_PUBLIC int argon2id_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
|
||||
const size_t pwdlen, argon2_type type);
|
||||
@@ -293,6 +360,27 @@ ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
|
||||
|
||||
/**
|
||||
* Argon2i: Version of Argon2 that picks memory blocks
|
||||
* independent on the password and salt. Good for side-channels,
|
||||
* but worse w.r.t. tradeoff attacks if only one pass is used.
|
||||
*****
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2i_ctx(argon2_context *context);
|
||||
|
||||
/**
|
||||
* Argon2id: Version of Argon2 where the first half-pass over memory is
|
||||
* password-independent, the rest are password-dependent (on the password and
|
||||
* salt). OK against side channels (they reduce to 1/2-pass Argon2i), and
|
||||
* better with w.r.t. tradeoff attacks (similar to Argon2d).
|
||||
*****
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2id_ctx(argon2_context *context);
|
||||
|
||||
/**
|
||||
* Verify if a given password is correct for Argon2d hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
@@ -302,6 +390,25 @@ ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash);
|
||||
|
||||
/**
|
||||
* Verify if a given password is correct for Argon2i hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @param hash The password hash to verify. The length of the hash is
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2i_verify_ctx(argon2_context *context, const char *hash);
|
||||
|
||||
/**
|
||||
* Verify if a given password is correct for Argon2id hashing
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @param hash The password hash to verify. The length of the hash is
|
||||
* specified by the context outlen member
|
||||
* @return Zero if successful, a non zero error code otherwise
|
||||
*/
|
||||
ARGON2_PUBLIC int argon2id_verify_ctx(argon2_context *context,
|
||||
const char *hash);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash,
|
||||
argon2_type type);
|
||||
@@ -326,18 +433,6 @@ ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost,
|
||||
uint32_t parallelism, uint32_t saltlen,
|
||||
uint32_t hashlen, argon2_type type);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
///////////////////////////
|
||||
// Wolf's Additions
|
||||
///////////////////////////
|
||||
|
||||
void WolfArgon2dPoWHash(void *Output, void *Matrix, const void *BlkHdr);
|
||||
void WolfArgon2dAllocateCtx(void **Matrix);
|
||||
void WolfArgon2dFreeCtx(void *Matrix);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -25,7 +25,6 @@
|
||||
#endif
|
||||
#define VC_GE_2005(version) (version >= 1400)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -35,6 +34,10 @@
|
||||
#include "../blake2/blake2.h"
|
||||
#include "../blake2/blake2-impl.h"
|
||||
|
||||
#ifdef GENKAT
|
||||
#include "genkat.h"
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_attribute(optnone)
|
||||
#define NOT_OPTIMIZED __attribute__((optnone))
|
||||
@@ -131,7 +134,7 @@ void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
|
||||
}
|
||||
|
||||
/* Memory clear flag defaults to true. */
|
||||
int FLAG_clear_internal_memory = 1;
|
||||
int FLAG_clear_internal_memory = 0;
|
||||
void clear_internal_memory(void *v, size_t n) {
|
||||
if (FLAG_clear_internal_memory && v) {
|
||||
secure_wipe_memory(v, n);
|
||||
@@ -163,6 +166,10 @@ void finalize(const argon2_context *context, argon2_instance_t *instance) {
|
||||
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
print_tag(context->out, context->outlen);
|
||||
#endif
|
||||
|
||||
free_memory(context, (uint8_t *)instance->memory,
|
||||
instance->memory_blocks, sizeof(block));
|
||||
}
|
||||
@@ -249,6 +256,9 @@ static int fill_memory_blocks_st(argon2_instance_t *instance) {
|
||||
fill_segment(instance, position);
|
||||
}
|
||||
}
|
||||
#ifdef GENKAT
|
||||
internal_kat(instance, r); /* Print all memory blocks */
|
||||
#endif
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
@@ -331,6 +341,10 @@ static int fill_memory_blocks_mt(argon2_instance_t *instance) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GENKAT
|
||||
internal_kat(instance, r); /* Print all memory blocks */
|
||||
#endif
|
||||
}
|
||||
|
||||
fail:
|
||||
@@ -530,7 +544,8 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
store32(&value, context->t_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, ARGON2_VERSION_NUMBER);
|
||||
// store32(&value, ARGON2_VERSION_NUMBER);
|
||||
store32(&value, context->version);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, (uint32_t)type);
|
||||
@@ -538,7 +553,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
|
||||
store32(&value, context->pwdlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
|
||||
if (context->pwd != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
|
||||
context->pwdlen);
|
||||
@@ -548,7 +563,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
context->pwdlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
store32(&value, context->saltlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
@@ -602,11 +617,14 @@ int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
/* Hashing all inputs */
|
||||
initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
|
||||
clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
#ifdef GENKAT
|
||||
initial_kat(blockhash, context, instance->type);
|
||||
#endif
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
fill_first_blocks(blockhash, instance);
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -25,12 +25,12 @@
|
||||
/**********************Argon2 internal constants*******************************/
|
||||
|
||||
enum argon2_core_constants {
|
||||
/* Version of the algorithm */
|
||||
ARGON2_VERSION_NUMBER = 0x10,
|
||||
/* Memory block size in bytes */
|
||||
ARGON2_BLOCK_SIZE = 1024,
|
||||
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
|
||||
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
|
||||
ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
|
||||
ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
|
||||
|
||||
/* Number of pseudo-random values generated by one call to Blake in Argon2i
|
||||
to
|
||||
@@ -76,7 +76,6 @@ typedef struct Argon2_instance_t {
|
||||
uint32_t segment_length;
|
||||
uint32_t lane_length;
|
||||
uint32_t lanes;
|
||||
uint32_t limit;
|
||||
uint32_t threads;
|
||||
argon2_type type;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -326,6 +326,10 @@ int decode_string(argon2_context *ctx, const char *str, argon2_type type) {
|
||||
CC("$");
|
||||
CC(type_string);
|
||||
|
||||
/* Reading the version number if the default is suppressed */
|
||||
ctx->version = ARGON2_VERSION_10;
|
||||
CC_opt("$v=", DECIMAL_U32(ctx->version));
|
||||
|
||||
CC("$m=");
|
||||
DECIMAL_U32(ctx->m_cost);
|
||||
CC(",t=");
|
||||
@@ -411,6 +415,9 @@ int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
|
||||
SS("$");
|
||||
SS(type_string);
|
||||
|
||||
SS("$v=");
|
||||
SX(ctx->version);
|
||||
|
||||
SS("$m=");
|
||||
SX(ctx->m_cost);
|
||||
SS(",t=");
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -34,6 +34,117 @@
|
||||
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
|
||||
static void fill_block(__m512i *state, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
__m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm512_xor_si512(
|
||||
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
|
||||
block_XY[i] = _mm512_xor_si512(
|
||||
state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm512_xor_si512(
|
||||
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
BLAKE2_ROUND_1( state[ 0], state[ 1], state[ 2], state[ 3],
|
||||
state[ 4], state[ 5], state[ 6], state[ 7] );
|
||||
BLAKE2_ROUND_1( state[ 8], state[ 9], state[10], state[11],
|
||||
state[12], state[13], state[14], state[15] );
|
||||
|
||||
BLAKE2_ROUND_2( state[ 0], state[ 2], state[ 4], state[ 6],
|
||||
state[ 8], state[10], state[12], state[14] );
|
||||
BLAKE2_ROUND_2( state[ 1], state[ 3], state[ 5], state[ 7],
|
||||
state[ 9], state[11], state[13], state[15] );
|
||||
|
||||
/*
|
||||
for (i = 0; i < 2; ++i) {
|
||||
BLAKE2_ROUND_1(
|
||||
state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
|
||||
state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
BLAKE2_ROUND_2(
|
||||
state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
|
||||
state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
|
||||
}
|
||||
*/
|
||||
|
||||
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm512_xor_si512(state[i], block_XY[i]);
|
||||
_mm512_storeu_si512((__m512i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__AVX2__)
|
||||
|
||||
static void fill_block(__m256i *state, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
|
||||
block_XY[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
BLAKE2_ROUND_1( state[ 0], state[ 4], state[ 1], state[ 5],
|
||||
state[ 2], state[ 6], state[ 3], state[ 7] );
|
||||
BLAKE2_ROUND_1( state[ 8], state[12], state[ 9], state[13],
|
||||
state[10], state[14], state[11], state[15] );
|
||||
BLAKE2_ROUND_1( state[16], state[20], state[17], state[21],
|
||||
state[18], state[22], state[19], state[23] );
|
||||
BLAKE2_ROUND_1( state[24], state[28], state[25], state[29],
|
||||
state[26], state[30], state[27], state[31] );
|
||||
|
||||
BLAKE2_ROUND_2( state[ 0], state[ 4], state[ 8], state[12],
|
||||
state[16], state[20], state[24], state[28] );
|
||||
BLAKE2_ROUND_2( state[ 1], state[ 5], state[ 9], state[13],
|
||||
state[17], state[21], state[25], state[29] );
|
||||
BLAKE2_ROUND_2( state[ 2], state[ 6], state[10], state[14],
|
||||
state[18], state[22], state[26], state[30] );
|
||||
BLAKE2_ROUND_2( state[ 3], state[ 7], state[11], state[15],
|
||||
state[19], state[23], state[27], state[31] );
|
||||
|
||||
/*
|
||||
for (i = 0; i < 4; ++i) {
|
||||
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
|
||||
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
|
||||
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
|
||||
}
|
||||
*/
|
||||
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
|
||||
_mm256_storeu_si256((__m256i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#else // SSE2
|
||||
|
||||
static void fill_block(__m128i *state, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
|
||||
@@ -53,6 +164,41 @@ static void fill_block(__m128i *state, const block *ref_block,
|
||||
}
|
||||
}
|
||||
|
||||
BLAKE2_ROUND( state[ 0], state[ 1], state[ 2], state[ 3],
|
||||
state[ 4], state[ 5], state[ 6], state[ 7] );
|
||||
BLAKE2_ROUND( state[ 8], state[ 9], state[10], state[11],
|
||||
state[12], state[13], state[14], state[15] );
|
||||
BLAKE2_ROUND( state[16], state[17], state[18], state[19],
|
||||
state[20], state[21], state[22], state[23] );
|
||||
BLAKE2_ROUND( state[24], state[25], state[26], state[27],
|
||||
state[28], state[29], state[30], state[31] );
|
||||
BLAKE2_ROUND( state[32], state[33], state[34], state[35],
|
||||
state[36], state[37], state[38], state[39] );
|
||||
BLAKE2_ROUND( state[40], state[41], state[42], state[43],
|
||||
state[44], state[45], state[46], state[47] );
|
||||
BLAKE2_ROUND( state[48], state[49], state[50], state[51],
|
||||
state[52], state[53], state[54], state[55] );
|
||||
BLAKE2_ROUND( state[56], state[57], state[58], state[59],
|
||||
state[60], state[61], state[62], state[63] );
|
||||
|
||||
BLAKE2_ROUND( state[ 0], state[ 8], state[16], state[24],
|
||||
state[32], state[40], state[48], state[56] );
|
||||
BLAKE2_ROUND( state[ 1], state[ 9], state[17], state[25],
|
||||
state[33], state[41], state[49], state[57] );
|
||||
BLAKE2_ROUND( state[ 2], state[10], state[18], state[26],
|
||||
state[34], state[42], state[50], state[58] );
|
||||
BLAKE2_ROUND( state[ 3], state[11], state[19], state[27],
|
||||
state[35], state[43], state[51], state[59] );
|
||||
BLAKE2_ROUND( state[ 4], state[12], state[20], state[28],
|
||||
state[36], state[44], state[52], state[60] );
|
||||
BLAKE2_ROUND( state[ 5], state[13], state[21], state[29],
|
||||
state[37], state[45], state[53], state[61] );
|
||||
BLAKE2_ROUND( state[ 6], state[14], state[22], state[30],
|
||||
state[38], state[46], state[54], state[62] );
|
||||
BLAKE2_ROUND( state[ 7], state[15], state[23], state[31],
|
||||
state[39], state[47], state[55], state[63] );
|
||||
|
||||
/*
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
|
||||
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
|
||||
@@ -64,17 +210,28 @@ static void fill_block(__m128i *state, const block *ref_block,
|
||||
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
|
||||
state[8 * 6 + i], state[8 * 7 + i]);
|
||||
}
|
||||
|
||||
*/
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(state[i], block_XY[i]);
|
||||
_mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
static void next_addresses(block *address_block, block *input_block) {
|
||||
/*Temporary zero-initialized blocks*/
|
||||
#if defined(__AVX512F__)
|
||||
__m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK];
|
||||
__m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK];
|
||||
#elif defined(__AVX2__)
|
||||
__m256i zero_block[ARGON2_HWORDS_IN_BLOCK];
|
||||
__m256i zero2_block[ARGON2_HWORDS_IN_BLOCK];
|
||||
#else
|
||||
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
|
||||
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
|
||||
#endif
|
||||
|
||||
memset(zero_block, 0, sizeof(zero_block));
|
||||
memset(zero2_block, 0, sizeof(zero2_block));
|
||||
@@ -88,30 +245,53 @@ static void next_addresses(block *address_block, block *input_block) {
|
||||
/*Second iteration of G*/
|
||||
fill_block(zero2_block, address_block, address_block, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
// block address_block, input_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index, i;
|
||||
__m128i state[64];
|
||||
int data_independent_addressing;
|
||||
#if defined(__AVX512F__)
|
||||
__m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
|
||||
#elif defined(__AVX2__)
|
||||
__m256i state[ARGON2_HWORDS_IN_BLOCK];
|
||||
#else
|
||||
__m128i state[ARGON2_OWORDS_IN_BLOCK];
|
||||
#endif
|
||||
// int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// data_independent_addressing =
|
||||
// (instance->type == Argon2_i) ||
|
||||
// (instance->type == Argon2_id && (position.pass == 0) &&
|
||||
// (position.slice < ARGON2_SYNC_POINTS / 2));
|
||||
|
||||
// if (data_independent_addressing) {
|
||||
// init_block_value(&input_block, 0);
|
||||
|
||||
// input_block.v[0] = position.pass;
|
||||
// input_block.v[1] = position.lane;
|
||||
// input_block.v[2] = position.slice;
|
||||
// input_block.v[3] = instance->memory_blocks;
|
||||
// input_block.v[4] = instance->passes;
|
||||
// input_block.v[5] = instance->type;
|
||||
// }
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
|
||||
/* Don't forget to generate the first block of addresses: */
|
||||
if (data_independent_addressing) {
|
||||
next_addresses(&address_block, &input_block);
|
||||
}
|
||||
// if (data_independent_addressing) {
|
||||
// next_addresses(&address_block, &input_block);
|
||||
// }
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
@@ -137,14 +317,14 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
next_addresses(&address_block, &input_block);
|
||||
}
|
||||
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
} else {
|
||||
// if (data_independent_addressing) {
|
||||
// if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
// next_addresses(&address_block, &input_block);
|
||||
// }
|
||||
// pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
// } else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
// }
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
@@ -165,8 +345,15 @@ void fill_segment(const argon2_instance_t *instance,
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
} else {
|
||||
if(0 == position.pass) {
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
} else {
|
||||
fill_block(state, ref_block, curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -46,7 +46,7 @@ typedef pthread_t argon2_thread_handle_t;
|
||||
* @param func A function pointer for the thread's entry point. Must not be
|
||||
* NULL.
|
||||
* @param args Pointer that is passed as an argument to @func. May be NULL.
|
||||
* @return 0 if @handle and @func are valid pointers and a thread is successfuly
|
||||
* @return 0 if @handle and @func are valid pointers and a thread is successfully
|
||||
* created.
|
||||
*/
|
||||
int argon2_thread_create(argon2_thread_handle_t *handle,
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -153,4 +153,4 @@ static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
|
||||
void clear_internal_memory(void *v, size_t n);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -78,7 +78,7 @@ int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen);
|
||||
const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
@@ -88,4 +88,4 @@ int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -387,4 +387,4 @@ fail:
|
||||
return ret;
|
||||
#undef TRY
|
||||
}
|
||||
/* Argon2 Team - End Code */
|
||||
/* Argon2 Team - End Code */
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -29,6 +29,8 @@
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX512F__)
|
||||
#if !defined(__AVX2__)
|
||||
#if !defined(__XOP__)
|
||||
#if defined(__SSSE3__)
|
||||
#define r16 \
|
||||
@@ -176,5 +178,294 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
#else /* __AVX2__ */
|
||||
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
|
||||
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
||||
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
||||
|
||||
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr32(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
\
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr24(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D1 = rotr32(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
\
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B1 = rotr24(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr16(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr63(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D1 = rotr16(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B1 = rotr63(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
\
|
||||
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while(0);
|
||||
|
||||
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
\
|
||||
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do{ \
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
\
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do{ \
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
} while((void)0, 0);
|
||||
|
||||
#endif /* __AVX2__ */
|
||||
|
||||
#else /* __AVX512F__ */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#define ror64(x, n) _mm512_ror_epi64((x), (n))
|
||||
|
||||
static __m512i muladd(__m512i x, __m512i y)
|
||||
{
|
||||
__m512i z = _mm512_mul_epu32(x, y);
|
||||
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
|
||||
}
|
||||
|
||||
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = muladd(A0, B0); \
|
||||
A1 = muladd(A1, B1); \
|
||||
\
|
||||
D0 = _mm512_xor_si512(D0, A0); \
|
||||
D1 = _mm512_xor_si512(D1, A1); \
|
||||
\
|
||||
D0 = ror64(D0, 32); \
|
||||
D1 = ror64(D1, 32); \
|
||||
\
|
||||
C0 = muladd(C0, D0); \
|
||||
C1 = muladd(C1, D1); \
|
||||
\
|
||||
B0 = _mm512_xor_si512(B0, C0); \
|
||||
B1 = _mm512_xor_si512(B1, C1); \
|
||||
\
|
||||
B0 = ror64(B0, 24); \
|
||||
B1 = ror64(B1, 24); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = muladd(A0, B0); \
|
||||
A1 = muladd(A1, B1); \
|
||||
\
|
||||
D0 = _mm512_xor_si512(D0, A0); \
|
||||
D1 = _mm512_xor_si512(D1, A1); \
|
||||
\
|
||||
D0 = ror64(D0, 16); \
|
||||
D1 = ror64(D1, 16); \
|
||||
\
|
||||
C0 = muladd(C0, D0); \
|
||||
C1 = muladd(C1, D1); \
|
||||
\
|
||||
B0 = _mm512_xor_si512(B0, C0); \
|
||||
B1 = _mm512_xor_si512(B1, C1); \
|
||||
\
|
||||
B0 = ror64(B0, 63); \
|
||||
B1 = ror64(B1, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
\
|
||||
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
\
|
||||
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
\
|
||||
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
\
|
||||
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define SWAP_HALVES(A0, A1) \
|
||||
do { \
|
||||
__m512i t0, t1; \
|
||||
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
|
||||
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
|
||||
A0 = t0; \
|
||||
A1 = t1; \
|
||||
} while((void)0, 0)
|
||||
|
||||
#define SWAP_QUARTERS(A0, A1) \
|
||||
do { \
|
||||
SWAP_HALVES(A0, A1); \
|
||||
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
||||
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
||||
} while((void)0, 0)
|
||||
|
||||
#define UNSWAP_QUARTERS(A0, A1) \
|
||||
do { \
|
||||
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
||||
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
||||
SWAP_HALVES(A0, A1); \
|
||||
} while((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
|
||||
do { \
|
||||
SWAP_HALVES(A0, B0); \
|
||||
SWAP_HALVES(C0, D0); \
|
||||
SWAP_HALVES(A1, B1); \
|
||||
SWAP_HALVES(C1, D1); \
|
||||
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
SWAP_HALVES(A0, B0); \
|
||||
SWAP_HALVES(C0, D0); \
|
||||
SWAP_HALVES(A1, B1); \
|
||||
SWAP_HALVES(C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
SWAP_QUARTERS(A0, A1); \
|
||||
SWAP_QUARTERS(B0, B1); \
|
||||
SWAP_QUARTERS(C0, C1); \
|
||||
SWAP_QUARTERS(D0, D1); \
|
||||
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
UNSWAP_QUARTERS(A0, A1); \
|
||||
UNSWAP_QUARTERS(B0, B1); \
|
||||
UNSWAP_QUARTERS(C0, C1); \
|
||||
UNSWAP_QUARTERS(D0, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif /* __AVX512F__ */
|
||||
#endif /* BLAKE_ROUND_MKA_OPT_H */
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* You may use this work under the terms of a Creative Commons CC0 1.0
|
||||
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
||||
* these licenses can be found at:
|
||||
*
|
||||
@@ -21,7 +21,7 @@
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
/*designed by the Lyra PHC team */
|
||||
/* designed by the Lyra PHC team */
|
||||
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
||||
const uint64_t xy = (x & m) * (y & m);
|
||||
@@ -53,4 +53,4 @@ static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
G(v3, v4, v9, v14); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -325,7 +325,7 @@ int scanhash_cryptolight(int thr_id, struct work *work,
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#if defined(__AES__)
|
||||
do {
|
||||
*nonceptr = ++n;
|
||||
cryptolight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
|
||||
|
@@ -1,14 +1,11 @@
|
||||
#if defined(__AES__)
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <memory.h>
|
||||
#include "cryptonight.h"
|
||||
#include "miner.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include <immintrin.h>
|
||||
//#include "avxdefs.h"
|
||||
|
||||
void aesni_parallel_noxor(uint8_t *long_state, uint8_t *text, uint8_t *ExpandedKey);
|
||||
void aesni_parallel_xor(uint8_t *text, uint8_t *ExpandedKey, uint8_t *long_state);
|
||||
void that_fucking_loop(uint8_t a[16], uint8_t b[16], uint8_t *long_state);
|
||||
|
||||
static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
{
|
||||
@@ -25,7 +22,6 @@ static inline void ExpandAESKey256_sub1(__m128i *tmp1, __m128i *tmp2)
|
||||
|
||||
static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
__m128i tmp2, tmp4;
|
||||
|
||||
tmp4 = _mm_aeskeygenassist_si128(*tmp1, 0x00);
|
||||
@@ -37,14 +33,12 @@ static inline void ExpandAESKey256_sub2(__m128i *tmp1, __m128i *tmp3)
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp4);
|
||||
*tmp3 = _mm_xor_si128(*tmp3, tmp2);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Special thanks to Intel for helping me
|
||||
// with ExpandAESKey256() and its subroutines
|
||||
static inline void ExpandAESKey256(char *keybuf)
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
__m128i tmp1, tmp2, tmp3, *keys;
|
||||
|
||||
keys = (__m128i *)keybuf;
|
||||
@@ -91,7 +85,6 @@ static inline void ExpandAESKey256(char *keybuf)
|
||||
tmp2 = _mm_aeskeygenassist_si128(tmp3, 0x40);
|
||||
ExpandAESKey256_sub1(&tmp1, &tmp2);
|
||||
keys[14] = tmp1;
|
||||
#endif
|
||||
}
|
||||
|
||||
// align to 64 byte cache line
|
||||
@@ -109,13 +102,19 @@ static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
|
||||
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
size_t i, j;
|
||||
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
@@ -214,7 +213,15 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
@@ -227,10 +234,14 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
: "cc" );
|
||||
|
||||
b_x = c_x;
|
||||
nextblock[0] = a[0] + hi;
|
||||
nextblock[1] = a[1] + lo;
|
||||
a[0] = b[0] ^ nextblock[0];
|
||||
a[1] = b[1] ^ nextblock[1];
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
a_x = _mm_load_si128( (__m128i*)a );
|
||||
c_x = _mm_load_si128( lsa );
|
||||
@@ -241,6 +252,14 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
const uint8_t tmp = ( (const uint8_t*)(lsa) )[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
((uint8_t*)(lsa))[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
@@ -251,8 +270,12 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
nextblock[0] = a[0] + hi;
|
||||
nextblock[1] = a[1] + lo;
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
nextblock[0] = a[0];
|
||||
nextblock[1] = cryptonightV7 ? a[1] ^ tweak : a[1];
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
|
||||
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
@@ -330,5 +353,5 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
@@ -7,11 +7,11 @@
|
||||
#include "cpuminer-config.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#endif
|
||||
|
||||
#else
|
||||
#include "crypto/c_groestl.h"
|
||||
#endif
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
@@ -30,12 +30,12 @@ void do_blake_hash(const void* input, size_t len, char* output) {
|
||||
}
|
||||
|
||||
void do_groestl_hash(const void* input, size_t len, char* output) {
|
||||
#ifdef NO_AES_NI
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
#else
|
||||
#if defined(__AES__)
|
||||
hashState_groestl256 ctx;
|
||||
init_groestl256( &ctx, 32 );
|
||||
update_and_final_groestl256( &ctx, output, input, len * 8 );
|
||||
#else
|
||||
groestl(input, len * 8, (uint8_t*)output);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -52,23 +52,24 @@ void (* const extra_hashes[4])( const void *, size_t, char *) =
|
||||
|
||||
void cryptonight_hash( void *restrict output, const void *input, int len )
|
||||
{
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
cryptonight_hash_ctx ( output, input, len );
|
||||
#else
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, len );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, len );
|
||||
#endif
|
||||
}
|
||||
|
||||
void cryptonight_hash_suw( void *restrict output, const void *input )
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
cryptonight_hash_ctx ( output, input, 76 );
|
||||
#else
|
||||
#if defined(__AES__)
|
||||
cryptonight_hash_aes( output, input, 76 );
|
||||
#else
|
||||
cryptonight_hash_ctx ( output, input, 76 );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cryptonightV7 = false;
|
||||
|
||||
int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
@@ -80,6 +81,11 @@ int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = n + 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[32 / 4] __attribute__((aligned(32)));
|
||||
|
||||
// if ( ( cryptonightV7 && ( *(uint8_t*)pdata < 7 ) )
|
||||
// || ( !cryptonightV7 && ( *(uint8_t*)pdata == 7 ) ) )
|
||||
// applog(LOG_WARNING,"Cryptonight variant mismatch, shares may be rejected.");
|
||||
|
||||
do
|
||||
{
|
||||
*nonceptr = ++n;
|
||||
@@ -87,6 +93,7 @@ int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if (unlikely( hash[7] < Htarg ))
|
||||
{
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
// work_set_target_ratio( work, hash );
|
||||
return true;
|
||||
}
|
||||
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
|
||||
@@ -97,6 +104,7 @@ int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
{
|
||||
cryptonightV7 = false;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
@@ -106,3 +114,15 @@ bool register_cryptonight_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_cryptonightv7_algo( algo_gate_t* gate )
|
||||
{
|
||||
cryptonightV7 = true;
|
||||
register_json_rpc2( gate );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_cryptonight;
|
||||
gate->hash = (void*)&cryptonight_hash;
|
||||
gate->hash_suw = (void*)&cryptonight_hash_suw;
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -20,8 +20,8 @@
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "crypto/int-util.h"
|
||||
#include "crypto/hash-ops.h"
|
||||
//#include "cryptonight.h"
|
||||
//#include "crypto/hash-ops.h"
|
||||
#include "cryptonight.h"
|
||||
|
||||
#if USE_INT128
|
||||
|
||||
@@ -51,6 +51,7 @@ typedef __uint128_t uint128_t;
|
||||
#define INIT_SIZE_BLK 8
|
||||
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
|
||||
|
||||
/*
|
||||
#pragma pack(push, 1)
|
||||
union cn_slow_hash_state {
|
||||
union hash_state hs;
|
||||
@@ -78,6 +79,7 @@ static void do_skein_hash(const void* input, size_t len, char* output) {
|
||||
int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
|
||||
assert(likely(SKEIN_SUCCESS == r));
|
||||
}
|
||||
*/
|
||||
|
||||
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
|
||||
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
|
||||
@@ -120,9 +122,11 @@ static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* pro
|
||||
extern uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi);
|
||||
#endif
|
||||
|
||||
/*
|
||||
static void (* const extra_hashes[4])(const void *, size_t, char *) = {
|
||||
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
|
||||
};
|
||||
*/
|
||||
|
||||
static inline size_t e2i(const uint8_t* a) {
|
||||
#if !LITE
|
||||
@@ -132,14 +136,16 @@ static inline size_t e2i(const uint8_t* a) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
|
||||
static inline void mul_sum_xor_dst( const uint8_t* a, uint8_t* c, uint8_t* dst,
|
||||
const uint64_t tweak )
|
||||
{
|
||||
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
|
||||
hi += ((uint64_t*) c)[0];
|
||||
|
||||
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
|
||||
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
|
||||
((uint64_t*) dst)[0] = hi;
|
||||
((uint64_t*) dst)[1] = lo;
|
||||
((uint64_t*) dst)[1] = cryptonightV7 ? lo ^ tweak : lo;
|
||||
}
|
||||
|
||||
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
|
||||
@@ -174,8 +180,16 @@ static __thread cryptonight_ctx ctx;
|
||||
|
||||
void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
{
|
||||
hash_process(&ctx.state.hs, (const uint8_t*) input, len);
|
||||
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
// hash_process(&ctx.state.hs, (const uint8_t*) input, len);
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
if ( cryptonightV7 && len < 43 )
|
||||
return;
|
||||
const uint64_t tweak = cryptonightV7
|
||||
? *((const uint64_t*) (((const uint8_t*)input) + 35))
|
||||
^ ctx.state.hs.w[24] : 0;
|
||||
|
||||
ctx.aes_ctx = (oaes_ctx*) oaes_alloc();
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
__builtin_prefetch( ctx.text + 64, 0, 3 );
|
||||
@@ -211,23 +225,44 @@ void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
xor_blocks_dst(&ctx.state.k[0], &ctx.state.k[32], ctx.a);
|
||||
xor_blocks_dst(&ctx.state.k[16], &ctx.state.k[48], ctx.b);
|
||||
|
||||
for (i = 0; likely(i < ITER / 4); ++i) {
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
|
||||
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)]);
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
|
||||
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)]);
|
||||
for (i = 0; likely(i < ITER / 4); ++i)
|
||||
{
|
||||
/* Dependency chain: address -> read value ------+
|
||||
* written value <-+ hard function (AES or MUL) <+
|
||||
* next address <-+
|
||||
*/
|
||||
/* Iteration 1 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.c, ctx.a);
|
||||
xor_blocks_dst(ctx.c, ctx.b, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 2 */
|
||||
mul_sum_xor_dst(ctx.c, ctx.a, &ctx.long_state[e2i(ctx.c)], tweak );
|
||||
|
||||
/* Iteration 3 */
|
||||
j = e2i(ctx.a);
|
||||
aesb_single_round(&ctx.long_state[j], ctx.b, ctx.a);
|
||||
xor_blocks_dst(ctx.b, ctx.c, &ctx.long_state[j]);
|
||||
|
||||
if ( cryptonightV7 )
|
||||
{
|
||||
uint8_t *lsa = (uint8_t*)&ctx.long_state[((uint64_t *)(ctx.a))[0] & 0x1FFFF0];
|
||||
const uint8_t tmp = lsa[11];
|
||||
const uint8_t index = ( ( (tmp >> 3) & 6 ) | (tmp & 1) ) << 1;
|
||||
lsa[11] = tmp ^ ( ( 0x75310 >> index) & 0x30 );
|
||||
}
|
||||
|
||||
/* Iteration 4 */
|
||||
mul_sum_xor_dst(ctx.b, ctx.a, &ctx.long_state[e2i(ctx.b)], tweak );
|
||||
|
||||
}
|
||||
|
||||
__builtin_prefetch( ctx.text, 0, 3 );
|
||||
@@ -266,7 +301,8 @@ void cryptonight_hash_ctx(void* output, const void* input, int len)
|
||||
aesb_pseudo_round_mut(&ctx.text[7 * AES_BLOCK_SIZE], ctx.aes_ctx->key->exp_data);
|
||||
}
|
||||
memcpy(ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
hash_permutation(&ctx.state.hs);
|
||||
// hash_permutation(&ctx.state.hs);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
/*memcpy(hash, &state, 32);*/
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
oaes_free((OAES_CTX **) &ctx.aes_ctx);
|
||||
|
@@ -45,5 +45,7 @@ int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len );
|
||||
|
||||
extern bool cryptonightV7;
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -67,7 +67,7 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
for ( int i=0; i < 20; i++ )
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
@@ -67,7 +67,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
for ( int i=0; i < 20; i++ )
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
@@ -60,7 +60,7 @@ int scanhash_sha256t_8way( int thr_id, struct work *work,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
for ( int k = 0; k < 20; k++ )
|
||||
be32enc( &edata[k], pdata[k] );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
|
@@ -31,11 +31,13 @@ int nextPerm( uint8_t n[], uint32_t count )
|
||||
return 0;
|
||||
|
||||
for ( i = count - 1; i>0 && n[i - 1] >= n[i]; i-- );
|
||||
tail = i;
|
||||
tail = i;
|
||||
|
||||
if ( tail > 0 )
|
||||
{
|
||||
for ( j = count - 1; j>tail && n[j] <= n[tail - 1]; j-- );
|
||||
evo_swap( &n[tail - 1], &n[j] );
|
||||
evo_swap( &n[tail - 1], &n[j] );
|
||||
}
|
||||
|
||||
for ( i = tail, j = count - 1; i<j; i++, j-- )
|
||||
evo_swap( &n[i], &n[j] );
|
||||
|
@@ -68,13 +68,7 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
uint32_t hash2[24] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[24] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[24*4] __attribute__ ((aligned (64)));
|
||||
// uint32_t inp0[24] __attribute__ ((aligned (64)));
|
||||
// uint32_t inp1[24] __attribute__ ((aligned (64)));
|
||||
// uint32_t inp2[24] __attribute__ ((aligned (64)));
|
||||
// uint32_t inp3[24] __attribute__ ((aligned (64)));
|
||||
|
||||
x16r_4way_ctx_holder ctx;
|
||||
|
||||
void *in0 = (void*) hash0;
|
||||
void *in1 = (void*) hash1;
|
||||
void *in2 = (void*) hash2;
|
||||
@@ -290,10 +284,6 @@ void x16r_4way_hash( void* output, const void* input )
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
break;
|
||||
}
|
||||
// in0 = (void*) hash0;
|
||||
// in1 = (void*) hash1;
|
||||
// in2 = (void*) hash2;
|
||||
// in3 = (void*) hash3;
|
||||
size = 64;
|
||||
}
|
||||
memcpy( output, hash0, 32 );
|
||||
@@ -325,7 +315,7 @@ int scanhash_x16r_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( s_ntime != endiandata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const char*) (&endiandata[1]), hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "x16r-gate.h"
|
||||
|
||||
void x16r_getAlgoString( const char* prevblock, char *output )
|
||||
void x16r_getAlgoString( const uint8_t* prevblock, char *output )
|
||||
{
|
||||
char *sptr = output;
|
||||
for ( int j = 0; j < X16R_HASH_FUNC_COUNT; j++ )
|
||||
@@ -16,14 +16,13 @@ void x16r_getAlgoString( const char* prevblock, char *output )
|
||||
*sptr = '\0';
|
||||
}
|
||||
|
||||
void x16s_getAlgoString( const char* prevblock, char *output )
|
||||
void x16s_getAlgoString( const uint8_t* prevblock, char *output )
|
||||
{
|
||||
uint8_t* data = (uint8_t*)prevblock;
|
||||
strcpy( output, "0123456789ABCDEF" );
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
uint8_t b = (15 - i) >> 1; // 16 ascii hex chars, reversed
|
||||
uint8_t algoDigit = (i & 1) ? data[b] & 0xF : data[b] >> 4;
|
||||
uint8_t algoDigit = (i & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4;
|
||||
int offset = algoDigit;
|
||||
// insert the nth character at the front
|
||||
char oldVal = output[offset];
|
||||
|
@@ -29,9 +29,9 @@ enum x16r_Algo {
|
||||
X16R_HASH_FUNC_COUNT
|
||||
};
|
||||
|
||||
bool (*x16_r_s_getAlgoString) ( const char*, char* );
|
||||
void x16r_getAlgoString( const char* prevblock, char *output );
|
||||
void x16s_getAlgoString( const char* prevblock, char *output );
|
||||
void (*x16_r_s_getAlgoString) ( const uint8_t*, char* );
|
||||
void x16r_getAlgoString( const uint8_t* prevblock, char *output );
|
||||
void x16s_getAlgoString( const uint8_t* prevblock, char *output );
|
||||
|
||||
bool register_x16r_algo( algo_gate_t* gate );
|
||||
bool register_x16s_algo( algo_gate_t* gate );
|
||||
|
@@ -205,7 +205,7 @@ int scanhash_x16r( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const char*) (&endiandata[1]), hashOrder );
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&endiandata[1]), hashOrder );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime );
|
||||
|
773
avxdefs.h
773
avxdefs.h
@@ -1,5 +1,5 @@
|
||||
#ifndef AVXDEFS_H__
|
||||
#define AVXDEFS_H__
|
||||
#define AVXDEFS_H__ 1
|
||||
|
||||
// Some tools to help using SIMD vectors.
|
||||
//
|
||||
@@ -156,7 +156,7 @@ static inline __m128i foo()
|
||||
// _mm_setzero_si128 uses pxor instruction, it's unclear what _mm_set_epi does.
|
||||
// If a pseudo constant is used repeatedly in a function it may be worthwhile
|
||||
// to define a register variable to represent that constant.
|
||||
// register __m128i zero = mm_zero;
|
||||
// register __m128i zero = mm_setzero_si128();
|
||||
|
||||
// Constant zero
|
||||
#define m128_zero _mm_setzero_si128()
|
||||
@@ -1018,7 +1018,8 @@ inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI__)
|
||||
#if defined(__AVX512F__)
|
||||
//#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI__)
|
||||
|
||||
// Experimental, not tested.
|
||||
|
||||
@@ -1034,7 +1035,12 @@ inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
//
|
||||
// Pseudo constants.
|
||||
|
||||
#define m512_zero _mm512_setzero_si512()
|
||||
// _mm512_setzero_si512 uses xor instruction. If needed frequently
|
||||
// in a function it's better to define a register variable (const?)
|
||||
// initialized to zero.
|
||||
// It isn't clear to me yet how set or set1 work.
|
||||
|
||||
#define m512_zero _mm512_setzero_si512()
|
||||
#define m512_one_512 _mm512_set_epi64x( 0ULL, 0ULL, 0ULL, 0ULL, \
|
||||
0ULL, 0ULL, 0ULL, 1ULL )
|
||||
#define m512_one_256 _mm512_set4_epi64x( 0ULL, 0ULL, 0ULL, 1ULL )
|
||||
@@ -1058,6 +1064,21 @@ inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
//
|
||||
// Pointer casting
|
||||
|
||||
// p = any aligned pointer
|
||||
// i = scaled array index
|
||||
// o = scaled address offset
|
||||
|
||||
// returns p as pointer to vector
|
||||
#define castp_m512i(p) ((__m512i*)(p))
|
||||
|
||||
// returns *p as vector value
|
||||
#define cast_m512i(p) (*((__m512i*)(p)))
|
||||
|
||||
// returns p[i] as vector value
|
||||
#define casti_m512i(p,i) (((__m512i*)(p))[(i)])
|
||||
|
||||
// returns p+o as pointer to vector
|
||||
#define casto_m512i(p,o) (((__m512i*)(p))+(o))
|
||||
|
||||
//
|
||||
// Memory functions
|
||||
@@ -1237,746 +1258,4 @@ inline __m256i mm256_aesenc_nokey_2x128_obs( __m256i x )
|
||||
|
||||
#endif // AVX512F
|
||||
|
||||
// Paired functions for interleaving and deinterleaving data for vector
|
||||
// processing.
|
||||
// Size is specfied in bits regardless of vector size to avoid pointer
|
||||
// arithmetic confusion with different size vectors and be consistent with
|
||||
// the function's name.
|
||||
//
|
||||
// Each function has 2 implementations, an optimized version that uses
|
||||
// vector indexing and a slower version that uses pointers. The optimized
|
||||
// version can only be used with 64 bit elements and only supports sizes
|
||||
// of 256, 512 or 640 bits, 32, 64, and 80 bytes respectively.
|
||||
//
|
||||
// NOTE: Contrary to GCC documentation, accessing vector elements using array
|
||||
// indexes only works with 64 bit elements.
|
||||
// Interleaving and deinterleaving of vectors of 32 bit elements
|
||||
// must use the slower implementations that don't use vector indexing.
|
||||
//
|
||||
// All data must be aligned to 256 bits for AVX2, or 128 bits for AVX.
|
||||
// Interleave source args and deinterleave destination args are not required
|
||||
// to be contiguous in memory but it's more efficient if they are.
|
||||
// Interleave source agrs may be the same actual arg repeated.
|
||||
// 640 bit deinterleaving 4x64 using 256 bit AVX2 requires the
|
||||
// destination buffers be defined with padding up to 768 bits for overrun
|
||||
// space. Although overrun space use is non destructive it should not overlay
|
||||
// useful data and should be ignored by the caller.
|
||||
|
||||
// SSE2 AVX
|
||||
|
||||
// interleave 4 arrays of 32 bit elements for 128 bit processing
|
||||
// bit_len must be 256, 512 or 640 bits.
|
||||
static inline void mm_interleave_4x32( void *dst, const void *src0,
|
||||
const void *src1, const void *src2, const void *src3, int bit_len )
|
||||
{
|
||||
uint32_t *s0 = (uint32_t*)src0;
|
||||
uint32_t *s1 = (uint32_t*)src1;
|
||||
uint32_t *s2 = (uint32_t*)src2;
|
||||
uint32_t *s3 = (uint32_t*)src3;
|
||||
__m128i* d = (__m128i*)dst;
|
||||
|
||||
d[0] = _mm_set_epi32( s3[ 0], s2[ 0], s1[ 0], s0[ 0] );
|
||||
d[1] = _mm_set_epi32( s3[ 1], s2[ 1], s1[ 1], s0[ 1] );
|
||||
d[2] = _mm_set_epi32( s3[ 2], s2[ 2], s1[ 2], s0[ 2] );
|
||||
d[3] = _mm_set_epi32( s3[ 3], s2[ 3], s1[ 3], s0[ 3] );
|
||||
d[4] = _mm_set_epi32( s3[ 4], s2[ 4], s1[ 4], s0[ 4] );
|
||||
d[5] = _mm_set_epi32( s3[ 5], s2[ 5], s1[ 5], s0[ 5] );
|
||||
d[6] = _mm_set_epi32( s3[ 6], s2[ 6], s1[ 6], s0[ 6] );
|
||||
d[7] = _mm_set_epi32( s3[ 7], s2[ 7], s1[ 7], s0[ 7] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d[ 8] = _mm_set_epi32( s3[ 8], s2[ 8], s1[ 8], s0[ 8] );
|
||||
d[ 9] = _mm_set_epi32( s3[ 9], s2[ 9], s1[ 9], s0[ 9] );
|
||||
d[10] = _mm_set_epi32( s3[10], s2[10], s1[10], s0[10] );
|
||||
d[11] = _mm_set_epi32( s3[11], s2[11], s1[11], s0[11] );
|
||||
d[12] = _mm_set_epi32( s3[12], s2[12], s1[12], s0[12] );
|
||||
d[13] = _mm_set_epi32( s3[13], s2[13], s1[13], s0[13] );
|
||||
d[14] = _mm_set_epi32( s3[14], s2[14], s1[14], s0[14] );
|
||||
d[15] = _mm_set_epi32( s3[15], s2[15], s1[15], s0[15] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d[16] = _mm_set_epi32( s3[16], s2[16], s1[16], s0[16] );
|
||||
d[17] = _mm_set_epi32( s3[17], s2[17], s1[17], s0[17] );
|
||||
d[18] = _mm_set_epi32( s3[18], s2[18], s1[18], s0[18] );
|
||||
d[19] = _mm_set_epi32( s3[19], s2[19], s1[19], s0[19] );
|
||||
|
||||
if ( bit_len <= 640 ) return;
|
||||
|
||||
d[20] = _mm_set_epi32( s3[20], s2[20], s1[20], s0[20] );
|
||||
d[21] = _mm_set_epi32( s3[21], s2[21], s1[21], s0[21] );
|
||||
d[22] = _mm_set_epi32( s3[22], s2[22], s1[22], s0[22] );
|
||||
d[23] = _mm_set_epi32( s3[23], s2[23], s1[23], s0[23] );
|
||||
|
||||
d[24] = _mm_set_epi32( s3[24], s2[24], s1[24], s0[24] );
|
||||
d[25] = _mm_set_epi32( s3[25], s2[25], s1[25], s0[25] );
|
||||
d[26] = _mm_set_epi32( s3[26], s2[26], s1[26], s0[26] );
|
||||
d[27] = _mm_set_epi32( s3[27], s2[27], s1[27], s0[27] );
|
||||
d[28] = _mm_set_epi32( s3[28], s2[28], s1[28], s0[28] );
|
||||
d[29] = _mm_set_epi32( s3[29], s2[29], s1[29], s0[29] );
|
||||
d[30] = _mm_set_epi32( s3[30], s2[30], s1[30], s0[30] );
|
||||
d[31] = _mm_set_epi32( s3[31], s2[31], s1[31], s0[31] );
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// bit_len must be multiple of 32
|
||||
static inline void mm_interleave_4x32x( void *dst, void *src0, void *src1,
|
||||
void *src2, void *src3, int bit_len )
|
||||
{
|
||||
uint32_t *d = (uint32_t*)dst;
|
||||
uint32_t *s0 = (uint32_t*)src0;
|
||||
uint32_t *s1 = (uint32_t*)src1;
|
||||
uint32_t *s2 = (uint32_t*)src2;
|
||||
uint32_t *s3 = (uint32_t*)src3;
|
||||
|
||||
for ( int i = 0; i < bit_len >> 5; i++, d += 4 )
|
||||
{
|
||||
*d = *(s0+i);
|
||||
*(d+1) = *(s1+i);
|
||||
*(d+2) = *(s2+i);
|
||||
*(d+3) = *(s3+i);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void mm_deinterleave_4x32( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, const void *src, int bit_len )
|
||||
{
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
__m128i* d0 = (__m128i*)dst0;
|
||||
__m128i* d1 = (__m128i*)dst1;
|
||||
__m128i* d2 = (__m128i*)dst2;
|
||||
__m128i* d3 = (__m128i*)dst3;
|
||||
|
||||
d0[0] = _mm_set_epi32( s[12], s[ 8], s[ 4], s[ 0] );
|
||||
d1[0] = _mm_set_epi32( s[13], s[ 9], s[ 5], s[ 1] );
|
||||
d2[0] = _mm_set_epi32( s[14], s[10], s[ 6], s[ 2] );
|
||||
d3[0] = _mm_set_epi32( s[15], s[11], s[ 7], s[ 3] );
|
||||
|
||||
d0[1] = _mm_set_epi32( s[28], s[24], s[20], s[16] );
|
||||
d1[1] = _mm_set_epi32( s[29], s[25], s[21], s[17] );
|
||||
d2[1] = _mm_set_epi32( s[30], s[26], s[22], s[18] );
|
||||
d3[1] = _mm_set_epi32( s[31], s[27], s[23], s[19] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d0[2] = _mm_set_epi32( s[44], s[40], s[36], s[32] );
|
||||
d1[2] = _mm_set_epi32( s[45], s[41], s[37], s[33] );
|
||||
d2[2] = _mm_set_epi32( s[46], s[42], s[38], s[34] );
|
||||
d3[2] = _mm_set_epi32( s[47], s[43], s[39], s[35] );
|
||||
|
||||
d0[3] = _mm_set_epi32( s[60], s[56], s[52], s[48] );
|
||||
d1[3] = _mm_set_epi32( s[61], s[57], s[53], s[49] );
|
||||
d2[3] = _mm_set_epi32( s[62], s[58], s[54], s[50] );
|
||||
d3[3] = _mm_set_epi32( s[63], s[59], s[55], s[51] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d0[4] = _mm_set_epi32( s[76], s[72], s[68], s[64] );
|
||||
d1[4] = _mm_set_epi32( s[77], s[73], s[69], s[65] );
|
||||
d2[4] = _mm_set_epi32( s[78], s[74], s[70], s[66] );
|
||||
d3[4] = _mm_set_epi32( s[79], s[75], s[71], s[67] );
|
||||
|
||||
if ( bit_len <= 640 ) return;
|
||||
|
||||
d0[5] = _mm_set_epi32( s[92], s[88], s[84], s[80] );
|
||||
d1[5] = _mm_set_epi32( s[93], s[89], s[85], s[81] );
|
||||
d2[5] = _mm_set_epi32( s[94], s[90], s[86], s[82] );
|
||||
d3[5] = _mm_set_epi32( s[95], s[91], s[87], s[83] );
|
||||
|
||||
d0[6] = _mm_set_epi32( s[108], s[104], s[100], s[ 96] );
|
||||
d1[6] = _mm_set_epi32( s[109], s[105], s[101], s[ 97] );
|
||||
d2[6] = _mm_set_epi32( s[110], s[106], s[102], s[ 98] );
|
||||
d3[6] = _mm_set_epi32( s[111], s[107], s[103], s[ 99] );
|
||||
|
||||
d0[7] = _mm_set_epi32( s[124], s[120], s[116], s[112] );
|
||||
d1[7] = _mm_set_epi32( s[125], s[121], s[117], s[113] );
|
||||
d2[7] = _mm_set_epi32( s[126], s[122], s[118], s[114] );
|
||||
d3[7] = _mm_set_epi32( s[127], s[123], s[119], s[115] );
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// deinterleave 4 arrays into individual buffers for scalarm processing
|
||||
// bit_len must be multiple of 32
|
||||
static inline void mm_deinterleave_4x32x( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, const void *src, int bit_len )
|
||||
{
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
uint32_t *d0 = (uint32_t*)dst0;
|
||||
uint32_t *d1 = (uint32_t*)dst1;
|
||||
uint32_t *d2 = (uint32_t*)dst2;
|
||||
uint32_t *d3 = (uint32_t*)dst3;
|
||||
|
||||
for ( int i = 0; i < bit_len >> 5; i++, s += 4 )
|
||||
{
|
||||
*(d0+i) = *s;
|
||||
*(d1+i) = *(s+1);
|
||||
*(d2+i) = *(s+2);
|
||||
*(d3+i) = *(s+3);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// Interleave 4 source buffers containing 64 bit data into the destination
|
||||
// buffer. Only bit_len 256, 512, 640 & 1024 are supported.
|
||||
static inline void mm256_interleave_4x64( void *dst, const void *src0,
|
||||
const void *src1, const void *src2, const void *src3, int bit_len )
|
||||
{
|
||||
__m256i* d = (__m256i*)dst;
|
||||
uint64_t *s0 = (uint64_t*)src0;
|
||||
uint64_t *s1 = (uint64_t*)src1;
|
||||
uint64_t *s2 = (uint64_t*)src2;
|
||||
uint64_t *s3 = (uint64_t*)src3;
|
||||
|
||||
d[0] = _mm256_set_epi64x( s3[0], s2[0], s1[0], s0[0] );
|
||||
d[1] = _mm256_set_epi64x( s3[1], s2[1], s1[1], s0[1] );
|
||||
d[2] = _mm256_set_epi64x( s3[2], s2[2], s1[2], s0[2] );
|
||||
d[3] = _mm256_set_epi64x( s3[3], s2[3], s1[3], s0[3] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d[4] = _mm256_set_epi64x( s3[4], s2[4], s1[4], s0[4] );
|
||||
d[5] = _mm256_set_epi64x( s3[5], s2[5], s1[5], s0[5] );
|
||||
d[6] = _mm256_set_epi64x( s3[6], s2[6], s1[6], s0[6] );
|
||||
d[7] = _mm256_set_epi64x( s3[7], s2[7], s1[7], s0[7] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d[8] = _mm256_set_epi64x( s3[8], s2[8], s1[8], s0[8] );
|
||||
d[9] = _mm256_set_epi64x( s3[9], s2[9], s1[9], s0[9] );
|
||||
|
||||
if ( bit_len <= 640 ) return;
|
||||
|
||||
d[10] = _mm256_set_epi64x( s3[10], s2[10], s1[10], s0[10] );
|
||||
d[11] = _mm256_set_epi64x( s3[11], s2[11], s1[11], s0[11] );
|
||||
|
||||
d[12] = _mm256_set_epi64x( s3[12], s2[12], s1[12], s0[12] );
|
||||
d[13] = _mm256_set_epi64x( s3[13], s2[13], s1[13], s0[13] );
|
||||
d[14] = _mm256_set_epi64x( s3[14], s2[14], s1[14], s0[14] );
|
||||
d[15] = _mm256_set_epi64x( s3[15], s2[15], s1[15], s0[15] );
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// Slower version
|
||||
// bit_len must be multiple of 64
|
||||
static inline void mm256_interleave_4x64x( void *dst, void *src0, void *src1,
|
||||
void *src2, void *src3, int bit_len )
|
||||
{
|
||||
uint64_t *d = (uint64_t*)dst;
|
||||
uint64_t *s0 = (uint64_t*)src0;
|
||||
uint64_t *s1 = (uint64_t*)src1;
|
||||
uint64_t *s2 = (uint64_t*)src2;
|
||||
uint64_t *s3 = (uint64_t*)src3;
|
||||
|
||||
for ( int i = 0; i < bit_len>>6; i++, d += 4 )
|
||||
{
|
||||
*d = *(s0+i);
|
||||
*(d+1) = *(s1+i);
|
||||
*(d+2) = *(s2+i);
|
||||
*(d+3) = *(s3+i);
|
||||
}
|
||||
}
|
||||
|
||||
// Deinterleave 4 buffers of 64 bit data from the source buffer.
|
||||
// bit_len must be 256, 512, 640 or 1024 bits.
|
||||
// Requires overrun padding for 640 bit len.
|
||||
static inline void mm256_deinterleave_4x64( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, const void *src, int bit_len )
|
||||
{
|
||||
__m256i* d0 = (__m256i*)dst0;
|
||||
__m256i* d1 = (__m256i*)dst1;
|
||||
__m256i* d2 = (__m256i*)dst2;
|
||||
__m256i* d3 = (__m256i*)dst3;
|
||||
uint64_t* s = (uint64_t*)src;
|
||||
|
||||
d0[0] = _mm256_set_epi64x( s[12], s[ 8], s[ 4], s[ 0] );
|
||||
d1[0] = _mm256_set_epi64x( s[13], s[ 9], s[ 5], s[ 1] );
|
||||
d2[0] = _mm256_set_epi64x( s[14], s[10], s[ 6], s[ 2] );
|
||||
d3[0] = _mm256_set_epi64x( s[15], s[11], s[ 7], s[ 3] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d0[1] = _mm256_set_epi64x( s[28], s[24], s[20], s[16] );
|
||||
d1[1] = _mm256_set_epi64x( s[29], s[25], s[21], s[17] );
|
||||
d2[1] = _mm256_set_epi64x( s[30], s[26], s[22], s[18] );
|
||||
d3[1] = _mm256_set_epi64x( s[31], s[27], s[23], s[19] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
if ( bit_len <= 640 )
|
||||
{
|
||||
// null change to overrun area
|
||||
d0[2] = _mm256_set_epi64x( d0[2][3], d0[2][2], s[36], s[32] );
|
||||
d1[2] = _mm256_set_epi64x( d1[2][3], d1[2][2], s[37], s[33] );
|
||||
d2[2] = _mm256_set_epi64x( d2[2][3], d2[2][2], s[38], s[34] );
|
||||
d3[2] = _mm256_set_epi64x( d3[2][3], d3[2][2], s[39], s[35] );
|
||||
return;
|
||||
}
|
||||
|
||||
d0[2] = _mm256_set_epi64x( s[44], s[40], s[36], s[32] );
|
||||
d1[2] = _mm256_set_epi64x( s[45], s[41], s[37], s[33] );
|
||||
d2[2] = _mm256_set_epi64x( s[46], s[42], s[38], s[34] );
|
||||
d3[2] = _mm256_set_epi64x( s[47], s[43], s[39], s[35] );
|
||||
|
||||
d0[3] = _mm256_set_epi64x( s[60], s[56], s[52], s[48] );
|
||||
d1[3] = _mm256_set_epi64x( s[61], s[57], s[53], s[49] );
|
||||
d2[3] = _mm256_set_epi64x( s[62], s[58], s[54], s[50] );
|
||||
d3[3] = _mm256_set_epi64x( s[63], s[59], s[55], s[51] );
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// Slower version
|
||||
// bit_len must be multiple 0f 64
|
||||
static inline void mm256_deinterleave_4x64x( void *dst0, void *dst1,
|
||||
void *dst2, void *dst3, void *src, int bit_len )
|
||||
{
|
||||
uint64_t *s = (uint64_t*)src;
|
||||
uint64_t *d0 = (uint64_t*)dst0;
|
||||
uint64_t *d1 = (uint64_t*)dst1;
|
||||
uint64_t *d2 = (uint64_t*)dst2;
|
||||
uint64_t *d3 = (uint64_t*)dst3;
|
||||
|
||||
for ( int i = 0; i < bit_len>>6; i++, s += 4 )
|
||||
{
|
||||
*(d0+i) = *s;
|
||||
*(d1+i) = *(s+1);
|
||||
*(d2+i) = *(s+2);
|
||||
*(d3+i) = *(s+3);
|
||||
}
|
||||
}
|
||||
|
||||
// Interleave 8 source buffers containing 32 bit data into the destination
|
||||
// vector
|
||||
static inline void mm256_interleave_8x32( void *dst, const void *src0,
|
||||
const void *src1, const void *src2, const void *src3, const void *src4,
|
||||
const void *src5, const void *src6, const void *src7, int bit_len )
|
||||
{
|
||||
uint32_t *s0 = (uint32_t*)src0;
|
||||
uint32_t *s1 = (uint32_t*)src1;
|
||||
uint32_t *s2 = (uint32_t*)src2;
|
||||
uint32_t *s3 = (uint32_t*)src3;
|
||||
uint32_t *s4 = (uint32_t*)src4;
|
||||
uint32_t *s5 = (uint32_t*)src5;
|
||||
uint32_t *s6 = (uint32_t*)src6;
|
||||
uint32_t *s7 = (uint32_t*)src7;
|
||||
__m256i *d = (__m256i*)dst;
|
||||
|
||||
d[ 0] = _mm256_set_epi32( s7[0], s6[0], s5[0], s4[0],
|
||||
s3[0], s2[0], s1[0], s0[0] );
|
||||
d[ 1] = _mm256_set_epi32( s7[1], s6[1], s5[1], s4[1],
|
||||
s3[1], s2[1], s1[1], s0[1] );
|
||||
d[ 2] = _mm256_set_epi32( s7[2], s6[2], s5[2], s4[2],
|
||||
s3[2], s2[2], s1[2], s0[2] );
|
||||
d[ 3] = _mm256_set_epi32( s7[3], s6[3], s5[3], s4[3],
|
||||
s3[3], s2[3], s1[3], s0[3] );
|
||||
d[ 4] = _mm256_set_epi32( s7[4], s6[4], s5[4], s4[4],
|
||||
s3[4], s2[4], s1[4], s0[4] );
|
||||
d[ 5] = _mm256_set_epi32( s7[5], s6[5], s5[5], s4[5],
|
||||
s3[5], s2[5], s1[5], s0[5] );
|
||||
d[ 6] = _mm256_set_epi32( s7[6], s6[6], s5[6], s4[6],
|
||||
s3[6], s2[6], s1[6], s0[6] );
|
||||
d[ 7] = _mm256_set_epi32( s7[7], s6[7], s5[7], s4[7],
|
||||
s3[7], s2[7], s1[7], s0[7] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d[ 8] = _mm256_set_epi32( s7[ 8], s6[ 8], s5[ 8], s4[ 8],
|
||||
s3[ 8], s2[ 8], s1[ 8], s0[ 8] );
|
||||
d[ 9] = _mm256_set_epi32( s7[ 9], s6[ 9], s5[ 9], s4[ 9],
|
||||
s3[ 9], s2[ 9], s1[ 9], s0[ 9] );
|
||||
d[10] = _mm256_set_epi32( s7[10], s6[10], s5[10], s4[10],
|
||||
s3[10], s2[10], s1[10], s0[10] );
|
||||
d[11] = _mm256_set_epi32( s7[11], s6[11], s5[11], s4[11],
|
||||
s3[11], s2[11], s1[11], s0[11] );
|
||||
d[12] = _mm256_set_epi32( s7[12], s6[12], s5[12], s4[12],
|
||||
s3[12], s2[12], s1[12], s0[12] );
|
||||
d[13] = _mm256_set_epi32( s7[13], s6[13], s5[13], s4[13],
|
||||
s3[13], s2[13], s1[13], s0[13] );
|
||||
d[14] = _mm256_set_epi32( s7[14], s6[14], s5[14], s4[14],
|
||||
s3[14], s2[14], s1[14], s0[14] );
|
||||
d[15] = _mm256_set_epi32( s7[15], s6[15], s5[15], s4[15],
|
||||
s3[15], s2[15], s1[15], s0[15] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d[16] = _mm256_set_epi32( s7[16], s6[16], s5[16], s4[16],
|
||||
s3[16], s2[16], s1[16], s0[16] );
|
||||
d[17] = _mm256_set_epi32( s7[17], s6[17], s5[17], s4[17],
|
||||
s3[17], s2[17], s1[17], s0[17] );
|
||||
d[18] = _mm256_set_epi32( s7[18], s6[18], s5[18], s4[18],
|
||||
s3[18], s2[18], s1[18], s0[18] );
|
||||
d[19] = _mm256_set_epi32( s7[19], s6[19], s5[19], s4[19],
|
||||
s3[19], s2[19], s1[19], s0[19] );
|
||||
|
||||
if ( bit_len <= 640 ) return;
|
||||
|
||||
d[20] = _mm256_set_epi32( s7[20], s6[20], s5[20], s4[20],
|
||||
s3[20], s2[20], s1[20], s0[20] );
|
||||
d[21] = _mm256_set_epi32( s7[21], s6[21], s5[21], s4[21],
|
||||
s3[21], s2[21], s1[21], s0[21] );
|
||||
d[22] = _mm256_set_epi32( s7[22], s6[22], s5[22], s4[22],
|
||||
s3[22], s2[22], s1[22], s0[22] );
|
||||
d[23] = _mm256_set_epi32( s7[23], s6[23], s5[23], s4[23],
|
||||
s3[23], s2[23], s1[23], s0[23] );
|
||||
|
||||
if ( bit_len <= 768 ) return;
|
||||
|
||||
d[24] = _mm256_set_epi32( s7[24], s6[24], s5[24], s4[24],
|
||||
s3[24], s2[24], s1[24], s0[24] );
|
||||
d[25] = _mm256_set_epi32( s7[25], s6[25], s5[25], s4[25],
|
||||
s3[25], s2[25], s1[25], s0[25] );
|
||||
d[26] = _mm256_set_epi32( s7[26], s6[26], s5[26], s4[26],
|
||||
s3[26], s2[26], s1[26], s0[26] );
|
||||
d[27] = _mm256_set_epi32( s7[27], s6[27], s5[27], s4[27],
|
||||
s3[27], s2[27], s1[27], s0[27] );
|
||||
d[28] = _mm256_set_epi32( s7[28], s6[28], s5[28], s4[28],
|
||||
s3[28], s2[28], s1[28], s0[28] );
|
||||
d[29] = _mm256_set_epi32( s7[29], s6[29], s5[29], s4[29],
|
||||
s3[29], s2[29], s1[29], s0[29] );
|
||||
d[30] = _mm256_set_epi32( s7[30], s6[30], s5[30], s4[30],
|
||||
s3[30], s2[30], s1[30], s0[30] );
|
||||
d[31] = _mm256_set_epi32( s7[31], s6[31], s5[31], s4[31],
|
||||
s3[31], s2[31], s1[31], s0[31] );
|
||||
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// Slower but it works with 32 bit data
|
||||
// bit_len must be multiple of 32
|
||||
static inline void mm256_interleave_8x32x( uint32_t *dst, uint32_t *src0,
|
||||
uint32_t *src1, uint32_t *src2, uint32_t *src3, uint32_t *src4,
|
||||
uint32_t *src5, uint32_t *src6, uint32_t *src7, int bit_len )
|
||||
{
|
||||
uint32_t *d = dst;;
|
||||
for ( int i = 0; i < bit_len>>5; i++, d += 8 )
|
||||
{
|
||||
*d = *(src0+i);
|
||||
*(d+1) = *(src1+i);
|
||||
*(d+2) = *(src2+i);
|
||||
*(d+3) = *(src3+i);
|
||||
*(d+4) = *(src4+i);
|
||||
*(d+5) = *(src5+i);
|
||||
*(d+6) = *(src6+i);
|
||||
*(d+7) = *(src7+i);
|
||||
}
|
||||
}
|
||||
|
||||
// Deinterleave 8 buffers of 32 bit data from the source buffer.
|
||||
static inline void mm256_deinterleave_8x32( void *dst0, void *dst1, void *dst2,
|
||||
void *dst3, void *dst4, void *dst5, void *dst6, void *dst7,
|
||||
const void *src, int bit_len )
|
||||
{
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
__m256i* d0 = (__m256i*)dst0;
|
||||
__m256i* d1 = (__m256i*)dst1;
|
||||
__m256i* d2 = (__m256i*)dst2;
|
||||
__m256i* d3 = (__m256i*)dst3;
|
||||
__m256i* d4 = (__m256i*)dst4;
|
||||
__m256i* d5 = (__m256i*)dst5;
|
||||
__m256i* d6 = (__m256i*)dst6;
|
||||
__m256i* d7 = (__m256i*)dst7;
|
||||
|
||||
d0[0] = _mm256_set_epi32( s[ 56], s[ 48], s[ 40], s[ 32],
|
||||
s[ 24], s[ 16], s[ 8], s[ 0] );
|
||||
d1[0] = _mm256_set_epi32( s[ 57], s[ 49], s[ 41], s[ 33],
|
||||
s[ 25], s[ 17], s[ 9], s[ 1] );
|
||||
d2[0] = _mm256_set_epi32( s[ 58], s[ 50], s[ 42], s[ 34],
|
||||
s[ 26], s[ 18], s[ 10], s[ 2] );
|
||||
d3[0] = _mm256_set_epi32( s[ 59], s[ 51], s[ 43], s[ 35],
|
||||
s[ 27], s[ 19], s[ 11], s[ 3] );
|
||||
d4[0] = _mm256_set_epi32( s[ 60], s[ 52], s[ 44], s[ 36],
|
||||
s[ 28], s[ 20], s[ 12], s[ 4] );
|
||||
d5[0] = _mm256_set_epi32( s[ 61], s[ 53], s[ 45], s[ 37],
|
||||
s[ 29], s[ 21], s[ 13], s[ 5] );
|
||||
d6[0] = _mm256_set_epi32( s[ 62], s[ 54], s[ 46], s[ 38],
|
||||
s[ 30], s[ 22], s[ 14], s[ 6] );
|
||||
d7[0] = _mm256_set_epi32( s[ 63], s[ 55], s[ 47], s[ 39],
|
||||
s[ 31], s[ 23], s[ 15], s[ 7] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d0[1] = _mm256_set_epi32( s[120], s[112], s[104], s[ 96],
|
||||
s[ 88], s[ 80], s[ 72], s[ 64] );
|
||||
d1[1] = _mm256_set_epi32( s[121], s[113], s[105], s[ 97],
|
||||
s[ 89], s[ 81], s[ 73], s[ 65] );
|
||||
d2[1] = _mm256_set_epi32( s[122], s[114], s[106], s[ 98],
|
||||
s[ 90], s[ 82], s[ 74], s[ 66]);
|
||||
d3[1] = _mm256_set_epi32( s[123], s[115], s[107], s[ 99],
|
||||
s[ 91], s[ 83], s[ 75], s[ 67] );
|
||||
d4[1] = _mm256_set_epi32( s[124], s[116], s[108], s[100],
|
||||
s[ 92], s[ 84], s[ 76], s[ 68] );
|
||||
d5[1] = _mm256_set_epi32( s[125], s[117], s[109], s[101],
|
||||
s[ 93], s[ 85], s[ 77], s[ 69] );
|
||||
d6[1] = _mm256_set_epi32( s[126], s[118], s[110], s[102],
|
||||
s[ 94], s[ 86], s[ 78], s[ 70] );
|
||||
d7[1] = _mm256_set_epi32( s[127], s[119], s[111], s[103],
|
||||
s[ 95], s[ 87], s[ 79], s[ 71] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
// null change for overrun space, vector indexing doesn't work for
|
||||
// 32 bit data
|
||||
if ( bit_len <= 640 )
|
||||
{
|
||||
uint32_t *d = ((uint32_t*)d0) + 8;
|
||||
d0[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[152], s[144], s[136], s[128] );
|
||||
d = ((uint32_t*)d1) + 8;
|
||||
d1[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[153], s[145], s[137], s[129] );
|
||||
d = ((uint32_t*)d2) + 8;
|
||||
d2[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[154], s[146], s[138], s[130]);
|
||||
d = ((uint32_t*)d3) + 8;
|
||||
d3[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[155], s[147], s[139], s[131] );
|
||||
d = ((uint32_t*)d4) + 8;
|
||||
d4[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[156], s[148], s[140], s[132] );
|
||||
d = ((uint32_t*)d5) + 8;
|
||||
d5[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[157], s[149], s[141], s[133] );
|
||||
d = ((uint32_t*)d6) + 8;
|
||||
d6[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[158], s[150], s[142], s[134] );
|
||||
d = ((uint32_t*)d7) + 8;
|
||||
d7[2] = _mm256_set_epi32( *(d+7), *(d+6), *(d+5), *(d+4),
|
||||
s[159], s[151], s[143], s[135] );
|
||||
return;
|
||||
}
|
||||
|
||||
d0[2] = _mm256_set_epi32( s[184], s[176], s[168], s[160],
|
||||
s[152], s[144], s[136], s[128] );
|
||||
d1[2] = _mm256_set_epi32( s[185], s[177], s[169], s[161],
|
||||
s[153], s[145], s[137], s[129] );
|
||||
d2[2] = _mm256_set_epi32( s[186], s[178], s[170], s[162],
|
||||
s[154], s[146], s[138], s[130] );
|
||||
d3[2] = _mm256_set_epi32( s[187], s[179], s[171], s[163],
|
||||
s[155], s[147], s[139], s[131] );
|
||||
d4[2] = _mm256_set_epi32( s[188], s[180], s[172], s[164],
|
||||
s[156], s[148], s[140], s[132] );
|
||||
d5[2] = _mm256_set_epi32( s[189], s[181], s[173], s[165],
|
||||
s[157], s[149], s[141], s[133] );
|
||||
d6[2] = _mm256_set_epi32( s[190], s[182], s[174], s[166],
|
||||
s[158], s[150], s[142], s[134] );
|
||||
d7[2] = _mm256_set_epi32( s[191], s[183], s[175], s[167],
|
||||
s[159], s[151], s[143], s[135] );
|
||||
|
||||
if ( bit_len <= 768 ) return;
|
||||
|
||||
d0[3] = _mm256_set_epi32( s[248], s[240], s[232], s[224],
|
||||
s[216], s[208], s[200], s[192] );
|
||||
d1[3] = _mm256_set_epi32( s[249], s[241], s[233], s[225],
|
||||
s[217], s[209], s[201], s[193] );
|
||||
d2[3] = _mm256_set_epi32( s[250], s[242], s[234], s[226],
|
||||
s[218], s[210], s[202], s[194] );
|
||||
d3[3] = _mm256_set_epi32( s[251], s[243], s[235], s[227],
|
||||
s[219], s[211], s[203], s[195] );
|
||||
d4[3] = _mm256_set_epi32( s[252], s[244], s[236], s[228],
|
||||
s[220], s[212], s[204], s[196] );
|
||||
d5[3] = _mm256_set_epi32( s[253], s[245], s[237], s[229],
|
||||
s[221], s[213], s[205], s[197] );
|
||||
d6[3] = _mm256_set_epi32( s[254], s[246], s[238], s[230],
|
||||
s[222], s[214], s[206], s[198] );
|
||||
d7[3] = _mm256_set_epi32( s[255], s[247], s[239], s[231],
|
||||
s[223], s[215], s[207], s[199] );
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// Deinterleave 8 arrays into indivdual buffers for scalar processing
|
||||
// bit_len must be multiple of 32
|
||||
static inline void mm256_deinterleave_8x32x( uint32_t *dst0, uint32_t *dst1,
|
||||
uint32_t *dst2,uint32_t *dst3, uint32_t *dst4, uint32_t *dst5,
|
||||
uint32_t *dst6,uint32_t *dst7,uint32_t *src, int bit_len )
|
||||
{
|
||||
uint32_t *s = src;
|
||||
for ( int i = 0; i < bit_len>>5; i++, s += 8 )
|
||||
{
|
||||
*(dst0+i) = *( s );
|
||||
*(dst1+i) = *( s + 1 );
|
||||
*(dst2+i) = *( s + 2 );
|
||||
*(dst3+i) = *( s + 3 );
|
||||
*(dst4+i) = *( s + 4 );
|
||||
*(dst5+i) = *( s + 5 );
|
||||
*(dst6+i) = *( s + 6 );
|
||||
*(dst7+i) = *( s + 7 );
|
||||
}
|
||||
}
|
||||
|
||||
// Convert from 4x32 AVX interleaving to 4x64 AVX2.
|
||||
// Can't do it in place
|
||||
static inline void mm256_reinterleave_4x64( void *dst, void *src, int bit_len )
|
||||
{
|
||||
__m256i* d = (__m256i*)dst;
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
|
||||
d[0] = _mm256_set_epi32( s[7], s[3], s[6], s[2], s[5], s[1], s[4], s[0] );
|
||||
d[1] = _mm256_set_epi32( s[15],s[11],s[14],s[10],s[13],s[9],s[12], s[8] );
|
||||
d[2] = _mm256_set_epi32( s[23],s[19],s[22],s[18],s[21],s[17],s[20],s[16] );
|
||||
d[3] = _mm256_set_epi32( s[31],s[27],s[30],s[26],s[29],s[25],s[28],s[24] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d[4] = _mm256_set_epi32( s[39],s[35],s[38],s[34],s[37],s[33],s[36],s[32] );
|
||||
d[5] = _mm256_set_epi32( s[47],s[43],s[46],s[42],s[45],s[41],s[44],s[40] );
|
||||
d[6] = _mm256_set_epi32( s[55],s[51],s[54],s[50],s[53],s[49],s[52],s[48] );
|
||||
d[7] = _mm256_set_epi32( s[63],s[59],s[62],s[58],s[61],s[57],s[60],s[56] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d[8] = _mm256_set_epi32( s[71],s[67],s[70],s[66],s[69],s[65],s[68],s[64] );
|
||||
d[9] = _mm256_set_epi32( s[79],s[75],s[78],s[74],s[77],s[73],s[76],s[72] );
|
||||
|
||||
if ( bit_len <= 640 ) return;
|
||||
|
||||
d[10] = _mm256_set_epi32(s[87],s[83],s[86],s[82],s[85],s[81],s[84],s[80]);
|
||||
d[11] = _mm256_set_epi32(s[95],s[91],s[94],s[90],s[93],s[89],s[92],s[88]);
|
||||
|
||||
d[12] = _mm256_set_epi32(s[103],s[99],s[102],s[98],s[101],s[97],s[100],s[96]);
|
||||
d[13] = _mm256_set_epi32(s[111],s[107],s[110],s[106],s[109],s[105],s[108],s[104]);
|
||||
d[14] = _mm256_set_epi32(s[119],s[115],s[118],s[114],s[117],s[113],s[116],s[112]);
|
||||
d[15] = _mm256_set_epi32(s[127],s[123],s[126],s[122],s[125],s[121],s[124],s[120]);
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// likely of no use.
|
||||
// convert 4x32 byte (128 bit) vectors to 4x64 (256 bit) vectors for AVX2
|
||||
// bit_len must be multiple of 64
|
||||
// broken
|
||||
static inline void mm256_reinterleave_4x64x( uint64_t *dst, uint32_t *src,
|
||||
int bit_len )
|
||||
{
|
||||
uint32_t *d = (uint32_t*)dst;
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
for ( int i = 0; i < bit_len >> 5; i += 8 )
|
||||
{
|
||||
*( d + i ) = *( s + i ); // 0 <- 0 8 <- 8
|
||||
*( d + i + 1 ) = *( s + i + 4 ); // 1 <- 4 9 <- 12
|
||||
*( d + i + 2 ) = *( s + i + 1 ); // 2 <- 1 10 <- 9
|
||||
*( d + i + 3 ) = *( s + i + 5 ); // 3 <- 5 11 <- 13
|
||||
*( d + i + 4 ) = *( s + i + 2 ); // 4 <- 2 12 <- 10
|
||||
*( d + i + 5 ) = *( s + i + 6 ); // 5 <- 6 13 <- 14
|
||||
*( d + i + 6 ) = *( s + i + 3 ); // 6 <- 3 14 <- 11
|
||||
*( d + i + 7 ) = *( s + i + 7 ); // 7 <- 7 15 <- 15
|
||||
}
|
||||
}
|
||||
|
||||
// Convert 4x64 byte (256 bit) vectors to 4x32 (128 bit) vectors for AVX
|
||||
// bit_len must be multiple of 64
|
||||
static inline void mm256_reinterleave_4x32( void *dst, void *src, int bit_len )
|
||||
{
|
||||
__m256i *d = (__m256i*)dst;
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
|
||||
d[0] = _mm256_set_epi32( s[ 7],s[ 5],s[ 3],s[ 1],s[ 6],s[ 4],s[ 2],s[ 0] );
|
||||
d[1] = _mm256_set_epi32( s[15],s[13],s[11],s[ 9],s[14],s[12],s[10],s[ 8] );
|
||||
d[2] = _mm256_set_epi32( s[23],s[21],s[19],s[17],s[22],s[20],s[18],s[16] );
|
||||
d[3] = _mm256_set_epi32( s[31],s[29],s[27],s[25],s[30],s[28],s[26],s[24] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d[4] = _mm256_set_epi32( s[39],s[37],s[35],s[33],s[38],s[36],s[34],s[32] );
|
||||
d[5] = _mm256_set_epi32( s[47],s[45],s[43],s[41],s[46],s[44],s[42],s[40] );
|
||||
d[6] = _mm256_set_epi32( s[55],s[53],s[51],s[49],s[54],s[52],s[50],s[48] );
|
||||
d[7] = _mm256_set_epi32( s[63],s[61],s[59],s[57],s[62],s[60],s[58],s[56] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d[8] = _mm256_set_epi32( s[71],s[69],s[67],s[65],s[70],s[68],s[66],s[64] );
|
||||
d[9] = _mm256_set_epi32( s[79],s[77],s[75],s[73],s[78],s[76],s[74],s[72] );
|
||||
|
||||
if ( bit_len <= 640 ) return;
|
||||
|
||||
d[10] = _mm256_set_epi32( s[87],s[85],s[83],s[81],s[86],s[84],s[82],s[80] );
|
||||
d[11] = _mm256_set_epi32( s[95],s[93],s[91],s[89],s[94],s[92],s[90],s[88] );
|
||||
|
||||
d[12] = _mm256_set_epi32( s[103],s[101],s[99],s[97],s[102],s[100],s[98],s[96] );
|
||||
d[13] = _mm256_set_epi32( s[111],s[109],s[107],s[105],s[110],s[108],s[106],s[104] );
|
||||
d[14] = _mm256_set_epi32( s[119],s[117],s[115],s[113],s[118],s[116],s[114],s[112] );
|
||||
d[15] = _mm256_set_epi32( s[127],s[125],s[123],s[121],s[126],s[124],s[122],s[120] );
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
static inline void mm256_interleave_2x128( void *dst, void *src0, void *src1,
|
||||
int bit_len )
|
||||
{
|
||||
__m256i *d = (__m256i*)dst;
|
||||
uint64_t *s0 = (uint64_t*)src0;
|
||||
uint64_t *s1 = (uint64_t*)src1;
|
||||
|
||||
d[0] = _mm256_set_epi64x( s1[ 1], s1[ 0], s0[ 1], s0[ 0] );
|
||||
d[1] = _mm256_set_epi64x( s1[ 3], s1[ 2], s0[ 3], s0[ 2] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d[2] = _mm256_set_epi64x( s1[ 5], s1[ 4], s0[ 5], s0[ 4] );
|
||||
d[3] = _mm256_set_epi64x( s1[ 7], s1[ 6], s0[ 7], s0[ 6] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
d[4] = _mm256_set_epi64x( s1[ 9], s1[ 8], s0[ 9], s0[ 8] );
|
||||
|
||||
if ( bit_len <= 640 ) return;
|
||||
|
||||
d[5] = _mm256_set_epi64x( s1[11], s1[10], s0[11], s0[10] );
|
||||
|
||||
d[6] = _mm256_set_epi64x( s1[13], s1[12], s0[13], s0[12] );
|
||||
d[7] = _mm256_set_epi64x( s1[15], s1[14], s0[15], s0[14] );
|
||||
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
static inline void mm256_deinterleave_2x128( void *dst0, void *dst1, void *src,
|
||||
int bit_len )
|
||||
{
|
||||
uint64_t *s = (uint64_t*)src;
|
||||
__m256i *d0 = (__m256i*)dst0;
|
||||
__m256i *d1 = (__m256i*)dst1;
|
||||
|
||||
d0[0] = _mm256_set_epi64x( s[ 5], s[4], s[ 1], s[ 0] );
|
||||
d1[0] = _mm256_set_epi64x( s[ 7], s[6], s[ 3], s[ 2] );
|
||||
|
||||
if ( bit_len <= 256 ) return;
|
||||
|
||||
d0[1] = _mm256_set_epi64x( s[13], s[12], s[ 9], s[ 8] );
|
||||
d1[1] = _mm256_set_epi64x( s[15], s[14], s[11], s[10] );
|
||||
|
||||
if ( bit_len <= 512 ) return;
|
||||
|
||||
if ( bit_len <= 640 )
|
||||
{
|
||||
d0[2] = _mm256_set_epi64x( d0[2][3], d0[2][2], s[17], s[16] );
|
||||
d1[2] = _mm256_set_epi64x( d1[2][3], d1[2][2], s[19], s[18] );
|
||||
return;
|
||||
}
|
||||
|
||||
d0[2] = _mm256_set_epi64x( s[21], s[20], s[17], s[16] );
|
||||
d1[2] = _mm256_set_epi64x( s[23], s[22], s[19], s[18] );
|
||||
|
||||
d0[3] = _mm256_set_epi64x( s[29], s[28], s[25], s[24] );
|
||||
d1[3] = _mm256_set_epi64x( s[31], s[30], s[27], s[26] );
|
||||
|
||||
// bit_len == 1024
|
||||
}
|
||||
|
||||
// not used
|
||||
static inline void mm_reinterleave_4x32( void *dst, void *src, int bit_len )
|
||||
{
|
||||
uint32_t *d = (uint32_t*)dst;
|
||||
uint32_t *s = (uint32_t*)src;
|
||||
for ( int i = 0; i < bit_len >> 5; i +=8 )
|
||||
{
|
||||
*( d + i ) = *( s + i );
|
||||
*( d + i + 1 ) = *( s + i + 2 );
|
||||
*( d + i + 2 ) = *( s + i + 4 );
|
||||
*( d + i + 3 ) = *( s + i + 6 );
|
||||
*( d + i + 4 ) = *( s + i + 1 );
|
||||
*( d + i + 5 ) = *( s + i + 3 );
|
||||
*( d + i + 6 ) = *( s + i + 5 );
|
||||
*( d + i + 7 ) = *( s + i + 7 );
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
#endif // AVXDEFS_H__
|
||||
#endif // AVXDEFS_H__
|
||||
|
@@ -3,10 +3,30 @@
|
||||
make distclean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -march=core-avx2 -msha -Wall" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-avx2-sha.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-avx2-sha
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -march=skylake-avx512 -Wall" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-avx512.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-avx512
|
||||
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-O3 -march=core-avx2 -Wall" ./configure --with-curl
|
||||
make -j 4
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-aes-avx2.exe
|
||||
mv cpuminer.exe cpuminer-avx2.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-avx2
|
||||
|
||||
|
47
configure
vendored
47
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.6.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.8.8.1.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.8.6'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.6'
|
||||
PACKAGE_VERSION='3.8.8.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.8.8.1'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -738,6 +738,7 @@ infodir
|
||||
docdir
|
||||
oldincludedir
|
||||
includedir
|
||||
runstatedir
|
||||
localstatedir
|
||||
sharedstatedir
|
||||
sysconfdir
|
||||
@@ -819,6 +820,7 @@ datadir='${datarootdir}'
|
||||
sysconfdir='${prefix}/etc'
|
||||
sharedstatedir='${prefix}/com'
|
||||
localstatedir='${prefix}/var'
|
||||
runstatedir='${localstatedir}/run'
|
||||
includedir='${prefix}/include'
|
||||
oldincludedir='/usr/include'
|
||||
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
|
||||
@@ -1071,6 +1073,15 @@ do
|
||||
| -silent | --silent | --silen | --sile | --sil)
|
||||
silent=yes ;;
|
||||
|
||||
-runstatedir | --runstatedir | --runstatedi | --runstated \
|
||||
| --runstate | --runstat | --runsta | --runst | --runs \
|
||||
| --run | --ru | --r)
|
||||
ac_prev=runstatedir ;;
|
||||
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
|
||||
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
|
||||
| --run=* | --ru=* | --r=*)
|
||||
runstatedir=$ac_optarg ;;
|
||||
|
||||
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
|
||||
ac_prev=sbindir ;;
|
||||
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
|
||||
@@ -1208,7 +1219,7 @@ fi
|
||||
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
|
||||
datadir sysconfdir sharedstatedir localstatedir includedir \
|
||||
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
|
||||
libdir localedir mandir
|
||||
libdir localedir mandir runstatedir
|
||||
do
|
||||
eval ac_val=\$$ac_var
|
||||
# Remove trailing slashes.
|
||||
@@ -1321,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.8.6 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.8.8.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1361,6 +1372,7 @@ Fine tuning of the installation directories:
|
||||
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
|
||||
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
|
||||
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
|
||||
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
|
||||
--libdir=DIR object code libraries [EPREFIX/lib]
|
||||
--includedir=DIR C header files [PREFIX/include]
|
||||
--oldincludedir=DIR C header files for non-gcc [/usr/include]
|
||||
@@ -1392,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.6:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.8.8.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1497,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.8.6
|
||||
cpuminer-opt configure 3.8.8.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2000,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.8.6, which was
|
||||
It was created by cpuminer-opt $as_me 3.8.8.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2495,7 +2507,7 @@ ac_config_headers="$ac_config_headers cpuminer-config.h"
|
||||
|
||||
|
||||
|
||||
am__api_version='1.14'
|
||||
am__api_version='1.15'
|
||||
|
||||
# Find a good install program. We prefer a C program (faster),
|
||||
# so one script is as good as another. But avoid the broken or
|
||||
@@ -2667,8 +2679,8 @@ test "$program_suffix" != NONE &&
|
||||
ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
|
||||
program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
|
||||
|
||||
# expand $ac_aux_dir to an absolute path
|
||||
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||
# Expand $ac_aux_dir to an absolute path.
|
||||
am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
||||
|
||||
if test x"${MISSING+set}" != xset; then
|
||||
case $am_aux_dir in
|
||||
@@ -2687,7 +2699,7 @@ else
|
||||
$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
|
||||
fi
|
||||
|
||||
if test x"${install_sh}" != xset; then
|
||||
if test x"${install_sh+set}" != xset; then
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
||||
@@ -2981,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.8.6'
|
||||
VERSION='3.8.8.1'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -3015,8 +3027,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
|
||||
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
|
||||
mkdir_p='$(MKDIR_P)'
|
||||
|
||||
# We need awk for the "check" target. The system "awk" is bad on
|
||||
# some platforms.
|
||||
# We need awk for the "check" target (and possibly the TAP driver). The
|
||||
# system "awk" is bad on some platforms.
|
||||
# Always define AMTAR for backward compatibility. Yes, it's still used
|
||||
# in the wild :-( We should find a proper way to deprecate it ...
|
||||
AMTAR='$${TAR-tar}'
|
||||
@@ -3075,6 +3087,7 @@ END
|
||||
fi
|
||||
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5
|
||||
$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; }
|
||||
# Check whether --enable-maintainer-mode was given.
|
||||
@@ -6677,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.8.6, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.8.8.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6743,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.8.6
|
||||
cpuminer-opt config.status 3.8.8.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.8.6])
|
||||
AC_INIT([cpuminer-opt], [3.8.8.1])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
156
cpu-miner.c
156
cpu-miner.c
@@ -98,7 +98,7 @@ static int opt_fail_pause = 10;
|
||||
static int opt_time_limit = 0;
|
||||
int opt_timeout = 300;
|
||||
static int opt_scantime = 5;
|
||||
static const bool opt_time = true;
|
||||
//static const bool opt_time = true;
|
||||
enum algos opt_algo = ALGO_NULL;
|
||||
int opt_scrypt_n = 0;
|
||||
int opt_pluck_n = 128;
|
||||
@@ -1988,7 +1988,9 @@ static void *miner_thread( void *userdata )
|
||||
}
|
||||
}
|
||||
// Display benchmark total
|
||||
if ( opt_benchmark && thr_id == opt_n_threads - 1 )
|
||||
// Update hashrate for API if no shares accepted yet.
|
||||
if ( ( opt_benchmark || !accepted_count )
|
||||
&& thr_id == opt_n_threads - 1 )
|
||||
{
|
||||
double hashrate = 0.;
|
||||
double hashcount = 0.;
|
||||
@@ -1999,27 +2001,30 @@ static void *miner_thread( void *userdata )
|
||||
}
|
||||
if ( hashcount )
|
||||
{
|
||||
char hc[16];
|
||||
char hc_units[2] = {0,0};
|
||||
char hr[16];
|
||||
char hr_units[2] = {0,0};
|
||||
global_hashcount = hashcount;
|
||||
global_hashrate = hashrate;
|
||||
scale_hash_for_display( &hashcount, hc_units );
|
||||
scale_hash_for_display( &hashrate, hr_units );
|
||||
if ( hc_units[0] )
|
||||
sprintf( hc, "%.2f", hashcount );
|
||||
else // no fractions of a hash
|
||||
sprintf( hc, "%.0f", hashcount );
|
||||
sprintf( hr, "%.2f", hashrate );
|
||||
if ( opt_benchmark )
|
||||
{
|
||||
char hc[16];
|
||||
char hc_units[2] = {0,0};
|
||||
char hr[16];
|
||||
char hr_units[2] = {0,0};
|
||||
scale_hash_for_display( &hashcount, hc_units );
|
||||
scale_hash_for_display( &hashrate, hr_units );
|
||||
if ( hc_units[0] )
|
||||
sprintf( hc, "%.2f", hashcount );
|
||||
else // no fractions of a hash
|
||||
sprintf( hc, "%.0f", hashcount );
|
||||
sprintf( hr, "%.2f", hashrate );
|
||||
#if ((defined(_WIN64) || defined(__WINDOWS__)))
|
||||
applog( LOG_NOTICE, "Total: %s %sH, %s %sH/s",
|
||||
applog( LOG_NOTICE, "Total: %s %sH, %s %sH/s",
|
||||
hc, hc_units, hr, hr_units );
|
||||
#else
|
||||
applog( LOG_NOTICE, "Total: %s %sH, %s %sH/s, %dC",
|
||||
applog( LOG_NOTICE, "Total: %s %sH, %s %sH/s, %dC",
|
||||
hc, hc_units, hr, hr_units, (uint32_t)cpu_temp(0) );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // miner_thread loop
|
||||
|
||||
@@ -2999,45 +3004,37 @@ static void show_credits()
|
||||
bool check_cpu_capability ()
|
||||
{
|
||||
char cpu_brand[0x40];
|
||||
// there is no CPU related feature specific to 4way, just AVX2 and AES
|
||||
bool cpu_has_sse2 = has_sse2();
|
||||
bool cpu_has_aes = has_aes_ni();
|
||||
bool cpu_has_sse42 = has_sse42();
|
||||
bool cpu_has_avx = has_avx1();
|
||||
bool cpu_has_avx2 = has_avx2();
|
||||
bool cpu_has_sha = has_sha();
|
||||
// no need to check if sw has sse2,
|
||||
// the code won't compile without it.
|
||||
// bool sw_has_sse2 = false;
|
||||
bool sw_has_aes = false;
|
||||
bool sw_has_sse42 = false;
|
||||
bool sw_has_avx = false;
|
||||
bool sw_has_avx2 = false;
|
||||
bool sw_has_sha = false;
|
||||
// bool sw_has_4way = false;
|
||||
bool cpu_has_sse2 = has_sse2();
|
||||
bool cpu_has_aes = has_aes_ni();
|
||||
bool cpu_has_sse42 = has_sse42();
|
||||
bool cpu_has_avx = has_avx1();
|
||||
bool cpu_has_avx2 = has_avx2();
|
||||
bool cpu_has_sha = has_sha();
|
||||
bool cpu_has_avx512 = has_avx512f();
|
||||
bool sw_has_aes = false;
|
||||
bool sw_has_sse42 = false;
|
||||
bool sw_has_avx = false;
|
||||
bool sw_has_avx2 = false;
|
||||
bool sw_has_avx512 = false;
|
||||
bool sw_has_sha = false;
|
||||
set_t algo_features = algo_gate.optimizations;
|
||||
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
||||
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
||||
bool algo_has_sse42 = set_incl( SSE42_OPT, algo_features );
|
||||
bool algo_has_avx = set_incl( AVX_OPT, algo_features );
|
||||
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
||||
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
||||
// bool algo_has_4way = set_incl( FOUR_WAY_OPT, algo_features );
|
||||
bool algo_has_sse2 = set_incl( SSE2_OPT, algo_features );
|
||||
bool algo_has_aes = set_incl( AES_OPT, algo_features );
|
||||
bool algo_has_sse42 = set_incl( SSE42_OPT, algo_features );
|
||||
bool algo_has_avx2 = set_incl( AVX2_OPT, algo_features );
|
||||
bool algo_has_avx512 = set_incl( AVX512_OPT, algo_features );
|
||||
bool algo_has_sha = set_incl( SHA_OPT, algo_features );
|
||||
bool use_aes;
|
||||
bool use_sse2;
|
||||
bool use_sse42;
|
||||
bool use_avx;
|
||||
bool use_avx2;
|
||||
bool use_avx512;
|
||||
bool use_sha;
|
||||
// bool use_4way;
|
||||
bool use_none;
|
||||
|
||||
#ifdef __AES__
|
||||
sw_has_aes = true;
|
||||
#endif
|
||||
// #ifdef __SSE2__
|
||||
// sw_has_sse2 = true;
|
||||
// #endif
|
||||
#ifdef __SSE4_2__
|
||||
sw_has_sse42 = true;
|
||||
#endif
|
||||
@@ -3047,12 +3044,12 @@ bool check_cpu_capability ()
|
||||
#ifdef __AVX2__
|
||||
sw_has_avx2 = true;
|
||||
#endif
|
||||
#ifdef __AVX512F__
|
||||
sw_has_avx512 = true;
|
||||
#endif
|
||||
#ifdef __SHA__
|
||||
sw_has_sha = true;
|
||||
#endif
|
||||
// #ifdef HASH_4WAY
|
||||
// sw_has_4way = true;
|
||||
// #endif
|
||||
|
||||
#if !((__AES__) || (__SSE2__))
|
||||
printf("Neither __AES__ nor __SSE2__ defined.\n");
|
||||
@@ -3072,33 +3069,33 @@ bool check_cpu_capability ()
|
||||
#endif
|
||||
|
||||
printf("CPU features:");
|
||||
if ( cpu_has_sse2 ) printf( " SSE2" );
|
||||
if ( cpu_has_aes ) printf( " AES" );
|
||||
if ( cpu_has_sse42 ) printf( " SSE4.2" );
|
||||
if ( cpu_has_avx ) printf( " AVX" );
|
||||
if ( cpu_has_avx2 ) printf( " AVX2" );
|
||||
if ( cpu_has_sha ) printf( " SHA" );
|
||||
if ( cpu_has_sse2 ) printf( " SSE2" );
|
||||
if ( cpu_has_aes ) printf( " AES" );
|
||||
if ( cpu_has_sse42 ) printf( " SSE4.2" );
|
||||
if ( cpu_has_avx ) printf( " AVX" );
|
||||
if ( cpu_has_avx2 ) printf( " AVX2" );
|
||||
if ( cpu_has_avx512 ) printf( " AVX512" );
|
||||
if ( cpu_has_sha ) printf( " SHA" );
|
||||
|
||||
printf(".\nSW features: SSE2");
|
||||
if ( sw_has_aes ) printf( " AES" );
|
||||
if ( sw_has_sse42 ) printf( " SSE4.2" );
|
||||
if ( sw_has_avx ) printf( " AVX" );
|
||||
if ( sw_has_avx2 ) printf( " AVX2" );
|
||||
// if ( sw_has_4way ) printf( " 4WAY" );
|
||||
if ( sw_has_sha ) printf( " SHA" );
|
||||
if ( sw_has_aes ) printf( " AES" );
|
||||
if ( sw_has_sse42 ) printf( " SSE4.2" );
|
||||
if ( sw_has_avx ) printf( " AVX" );
|
||||
if ( sw_has_avx2 ) printf( " AVX2" );
|
||||
if ( sw_has_avx512 ) printf( " AVX512" );
|
||||
if ( sw_has_sha ) printf( " SHA" );
|
||||
|
||||
|
||||
printf(".\nAlgo features:");
|
||||
if ( algo_features == EMPTY_SET ) printf( " None" );
|
||||
else
|
||||
{
|
||||
if ( algo_has_sse2 ) printf( " SSE2" );
|
||||
if ( algo_has_aes ) printf( " AES" );
|
||||
if ( algo_has_sse42 ) printf( " SSE4.2" );
|
||||
if ( algo_has_avx ) printf( " AVX" );
|
||||
if ( algo_has_avx2 ) printf( " AVX2" );
|
||||
// if ( algo_has_4way ) printf( " 4WAY" );
|
||||
if ( algo_has_sha ) printf( " SHA" );
|
||||
if ( algo_has_sse2 ) printf( " SSE2" );
|
||||
if ( algo_has_aes ) printf( " AES" );
|
||||
if ( algo_has_sse42 ) printf( " SSE4.2" );
|
||||
if ( algo_has_avx2 ) printf( " AVX2" );
|
||||
if ( algo_has_avx512 ) printf( " AVX512" );
|
||||
if ( algo_has_sha ) printf( " SHA" );
|
||||
}
|
||||
printf(".\n");
|
||||
|
||||
@@ -3118,11 +3115,6 @@ bool check_cpu_capability ()
|
||||
printf( "The SW build requires a CPU with SSE4.2!\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_avx && !cpu_has_avx )
|
||||
{
|
||||
printf( "The SW build requires a CPU with AVX!\n" );
|
||||
return false;
|
||||
}
|
||||
if ( sw_has_aes && !cpu_has_aes )
|
||||
{
|
||||
printf( "The SW build requires a CPU with AES!\n" );
|
||||
@@ -3135,13 +3127,13 @@ bool check_cpu_capability ()
|
||||
}
|
||||
|
||||
// Determine mining options
|
||||
use_sse2 = cpu_has_sse2 && algo_has_sse2;
|
||||
use_aes = cpu_has_aes && sw_has_aes && algo_has_aes;
|
||||
use_sse2 = cpu_has_sse2 && algo_has_sse2;
|
||||
use_aes = cpu_has_aes && sw_has_aes && algo_has_aes;
|
||||
use_sse42 = cpu_has_sse42 && sw_has_sse42 && algo_has_sse42;
|
||||
use_avx = cpu_has_avx && sw_has_avx && algo_has_avx;
|
||||
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
||||
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
||||
use_none = !( use_sse2 || use_aes || use_sse42 || use_avx || use_avx2 ||
|
||||
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
||||
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
||||
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
||||
use_none = !( use_sse2 || use_aes || use_sse42 || use_avx512 || use_avx2 ||
|
||||
use_sha );
|
||||
|
||||
// Display best options
|
||||
@@ -3149,12 +3141,12 @@ bool check_cpu_capability ()
|
||||
if ( use_none ) printf( " no optimizations" );
|
||||
else
|
||||
{
|
||||
if ( use_aes ) printf( " AES" );
|
||||
if ( use_avx2 ) printf( " AVX2" );
|
||||
else if ( use_avx ) printf( " AVX" );
|
||||
if ( use_aes ) printf( " AES" );
|
||||
if ( use_avx512 ) printf( " AVX512" );
|
||||
else if ( use_avx2 ) printf( " AVX2" );
|
||||
else if ( use_sse42 ) printf( " SSE4.2" );
|
||||
else if ( use_sse2 ) printf( " SSE2" );
|
||||
if ( use_sha ) printf( " SHA" );
|
||||
else if ( use_sse2 ) printf( " SSE2" );
|
||||
if ( use_sha ) printf( " SHA" );
|
||||
}
|
||||
printf( ".\n\n" );
|
||||
|
||||
|
1370
interleave.h
Normal file
1370
interleave.h
Normal file
File diff suppressed because it is too large
Load Diff
155
miner.h
155
miner.h
@@ -333,6 +333,7 @@ bool has_sha();
|
||||
bool has_aes_ni();
|
||||
bool has_avx1();
|
||||
bool has_avx2();
|
||||
bool has_avx512f();
|
||||
bool has_sse2();
|
||||
bool has_xop();
|
||||
bool has_fma3();
|
||||
@@ -485,8 +486,9 @@ enum algos {
|
||||
ALGO_ALLIUM,
|
||||
ALGO_ANIME,
|
||||
ALGO_ARGON2,
|
||||
ALGO_ARGON2DCRDS,
|
||||
ALGO_ARGON2DDYN,
|
||||
ALGO_ARGON2D250,
|
||||
ALGO_ARGON2D500,
|
||||
ALGO_ARGON2D4096,
|
||||
ALGO_AXIOM,
|
||||
ALGO_BASTION,
|
||||
ALGO_BLAKE,
|
||||
@@ -496,7 +498,8 @@ enum algos {
|
||||
ALGO_BMW,
|
||||
ALGO_C11,
|
||||
ALGO_CRYPTOLIGHT,
|
||||
ALGO_CRYPTONIGHT,
|
||||
ALGO_CRYPTONIGHT,
|
||||
ALGO_CRYPTONIGHTV7,
|
||||
ALGO_DECRED,
|
||||
ALGO_DEEP,
|
||||
ALGO_DMD_GR,
|
||||
@@ -565,8 +568,9 @@ static const char* const algo_names[] = {
|
||||
"allium",
|
||||
"anime",
|
||||
"argon2",
|
||||
"argon2d-crds",
|
||||
"argon2d-dyn",
|
||||
"argon2d250",
|
||||
"argon2d500",
|
||||
"argon2d4096",
|
||||
"axiom",
|
||||
"bastion",
|
||||
"blake",
|
||||
@@ -577,6 +581,7 @@ static const char* const algo_names[] = {
|
||||
"c11",
|
||||
"cryptolight",
|
||||
"cryptonight",
|
||||
"cryptonightv7",
|
||||
"decred",
|
||||
"deep",
|
||||
"dmd-gr",
|
||||
@@ -701,82 +706,84 @@ static char const usage[] = "\
|
||||
Usage: " PACKAGE_NAME " [OPTIONS]\n\
|
||||
Options:\n\
|
||||
-a, --algo=ALGO specify the algorithm to use\n\
|
||||
allium Garlicoin (GRLC)\n\
|
||||
anime Animecoin (ANI)\n\
|
||||
argon2 Argon2 Coin (AR2)\n\
|
||||
argon2d-crds Credits (CRDS)\n\
|
||||
argon2d-dyn Dynamic (DYN)\n\
|
||||
axiom Shabal-256 MemoHash\n\
|
||||
allium Garlicoin (GRLC)\n\
|
||||
anime Animecoin (ANI)\n\
|
||||
argon2 Argon2 Coin (AR2)\n\
|
||||
argon2d250 argon2d-crds, Credits (CRDS)\n\
|
||||
argon2d500 argon2d-dyn, Dynamic (DYN)\n\
|
||||
argon2d4096 argon2d-uis, Unitus (UIS)\n\
|
||||
axiom Shabal-256 MemoHash\n\
|
||||
bastion\n\
|
||||
blake blake256r14 (SFR)\n\
|
||||
blakecoin blake256r8\n\
|
||||
blake2s Blake-2 S\n\
|
||||
bmw BMW 256\n\
|
||||
c11 Chaincoin\n\
|
||||
cryptolight Cryptonight-light\n\
|
||||
cryptonight cryptonote, Monero (XMR)\n\
|
||||
decred Blake256r14dcr\n\
|
||||
deep Deepcoin (DCN)\n\
|
||||
dmd-gr Diamond\n\
|
||||
drop Dropcoin\n\
|
||||
fresh Fresh\n\
|
||||
groestl Groestl coin\n\
|
||||
heavy Heavy\n\
|
||||
hmq1725 Espers\n\
|
||||
hodl Hodlcoin\n\
|
||||
jha jackppot (Jackpotcoin)\n\
|
||||
keccak Maxcoin\n\
|
||||
keccakc Creative Coin\n\
|
||||
lbry LBC, LBRY Credits\n\
|
||||
luffa Luffa\n\
|
||||
lyra2h Hppcoin\n\
|
||||
lyra2re lyra2\n\
|
||||
lyra2rev2 lyrav2, Vertcoin\n\
|
||||
lyra2z Zcoin (XZC)\n\
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)\n\
|
||||
m7m Magi (XMG)\n\
|
||||
myr-gr Myriad-Groestl\n\
|
||||
neoscrypt NeoScrypt(128, 2, 1)\n\
|
||||
nist5 Nist5\n\
|
||||
pentablake 5 x blake512\n\
|
||||
phi1612 phi, LUX coin\n\
|
||||
pluck Pluck:128 (Supcoin)\n\
|
||||
blake blake256r14 (SFR)\n\
|
||||
blakecoin blake256r8\n\
|
||||
blake2s Blake-2 S\n\
|
||||
bmw BMW 256\n\
|
||||
c11 Chaincoin\n\
|
||||
cryptolight Cryptonight-light\n\
|
||||
cryptonight Cryptonote legacy\n\
|
||||
cryptonightv7 variant 7, Monero (XMR)\n\
|
||||
decred Blake256r14dcr\n\
|
||||
deep Deepcoin (DCN)\n\
|
||||
dmd-gr Diamond\n\
|
||||
drop Dropcoin\n\
|
||||
fresh Fresh\n\
|
||||
groestl Groestl coin\n\
|
||||
heavy Heavy\n\
|
||||
hmq1725 Espers\n\
|
||||
hodl Hodlcoin\n\
|
||||
jha jackppot (Jackpotcoin)\n\
|
||||
keccak Maxcoin\n\
|
||||
keccakc Creative Coin\n\
|
||||
lbry LBC, LBRY Credits\n\
|
||||
luffa Luffa\n\
|
||||
lyra2h Hppcoin\n\
|
||||
lyra2re lyra2\n\
|
||||
lyra2rev2 lyrav2, Vertcoin\n\
|
||||
lyra2z Zcoin (XZC)\n\
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)\n\
|
||||
m7m Magi (XMG)\n\
|
||||
myr-gr Myriad-Groestl\n\
|
||||
neoscrypt NeoScrypt(128, 2, 1)\n\
|
||||
nist5 Nist5\n\
|
||||
pentablake 5 x blake512\n\
|
||||
phi1612 phi, LUX coin\n\
|
||||
pluck Pluck:128 (Supcoin)\n\
|
||||
polytimos\n\
|
||||
quark Quark\n\
|
||||
qubit Qubit\n\
|
||||
scrypt scrypt(1024, 1, 1) (default)\n\
|
||||
scrypt:N scrypt(N, 1, 1)\n\
|
||||
quark Quark\n\
|
||||
qubit Qubit\n\
|
||||
scrypt scrypt(1024, 1, 1) (default)\n\
|
||||
scrypt:N scrypt(N, 1, 1)\n\
|
||||
scryptjane:nf\n\
|
||||
sha256d Double SHA-256\n\
|
||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||
shavite3 Shavite3\n\
|
||||
skein Skein+Sha (Skeincoin)\n\
|
||||
skein2 Double Skein (Woodcoin)\n\
|
||||
skunk Signatum (SIGT)\n\
|
||||
timetravel timeravel8, Machinecoin (MAC)\n\
|
||||
timetravel10 Bitcore (BTX)\n\
|
||||
tribus Denarius (DNR)\n\
|
||||
vanilla blake256r8vnl (VCash)\n\
|
||||
sha256d Double SHA-256\n\
|
||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||
shavite3 Shavite3\n\
|
||||
skein Skein+Sha (Skeincoin)\n\
|
||||
skein2 Double Skein (Woodcoin)\n\
|
||||
skunk Signatum (SIGT)\n\
|
||||
timetravel timeravel8, Machinecoin (MAC)\n\
|
||||
timetravel10 Bitcore (BTX)\n\
|
||||
tribus Denarius (DNR)\n\
|
||||
vanilla blake256r8vnl (VCash)\n\
|
||||
veltor\n\
|
||||
whirlpool\n\
|
||||
whirlpoolx\n\
|
||||
x11 Dash\n\
|
||||
x11evo Revolvercoin (XRE)\n\
|
||||
x11gost sib (SibCoin)\n\
|
||||
x12 Galaxie Cash (GCH)\n\
|
||||
x13 X13\n\
|
||||
x13sm3 hsr (Hshare)\n\
|
||||
x14 X14\n\
|
||||
x15 X15\n\
|
||||
x16r Ravencoin (RVN)\n\
|
||||
x16s Pigeoncoin (PGN)\n\
|
||||
x11 Dash\n\
|
||||
x11evo Revolvercoin (XRE)\n\
|
||||
x11gost sib (SibCoin)\n\
|
||||
x12 Galaxie Cash (GCH)\n\
|
||||
x13 X13\n\
|
||||
x13sm3 hsr (Hshare)\n\
|
||||
x14 X14\n\
|
||||
x15 X15\n\
|
||||
x16r Ravencoin (RVN)\n\
|
||||
x16s Pigeoncoin (PGN)\n\
|
||||
x17\n\
|
||||
xevan Bitsend (BSD)\n\
|
||||
yescrypt Globlboost-Y (BSTY)\n\
|
||||
yescryptr8 BitZeny (ZNY)\n\
|
||||
yescryptr16 Yenten (YTN)\n\
|
||||
yescryptr32 WAVI\n\
|
||||
zr5 Ziftr\n\
|
||||
xevan Bitsend (BSD)\n\
|
||||
yescrypt Globlboost-Y (BSTY)\n\
|
||||
yescryptr8 BitZeny (ZNY)\n\
|
||||
yescryptr16 Yenten (YTN)\n\
|
||||
yescryptr32 WAVI\n\
|
||||
zr5 Ziftr\n\
|
||||
-o, --url=URL URL of mining server\n\
|
||||
-O, --userpass=U:P username:password pair for mining server\n\
|
||||
-u, --user=USERNAME username for mining server\n\
|
||||
|
16
sysinfos.c
16
sysinfos.c
@@ -274,6 +274,7 @@ void cpu_getmodelid(char *outbuf, size_t maxsz)
|
||||
#define SSE2_Flag (1<<26)
|
||||
|
||||
#define AVX2_Flag (1<< 5) // ADV EBX
|
||||
#define AVX512F_Flag (1<<16)
|
||||
#define SHA_Flag (1<<29)
|
||||
|
||||
// Use this to detect presence of feature
|
||||
@@ -350,6 +351,21 @@ static inline bool has_avx2_()
|
||||
|
||||
bool has_avx2() { return has_avx2_(); }
|
||||
|
||||
static inline bool has_avx512f_()
|
||||
{
|
||||
#ifdef __arm__
|
||||
return false;
|
||||
#else
|
||||
int cpu_info[4] = { 0 };
|
||||
cpuid( EXTENDED_FEATURES, cpu_info );
|
||||
return cpu_info[ EBX_Reg ] & AVX512F_Flag;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool has_avx512f() { return has_avx512f_(); }
|
||||
|
||||
|
||||
// AMD only
|
||||
static inline bool has_xop_()
|
||||
{
|
||||
#ifdef __arm__
|
||||
|
@@ -51,11 +51,13 @@ rm -f config.status
|
||||
CFLAGS="-O3 -march=corei7-avx -Wall" ./configure $F
|
||||
make
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-aes-avx.exe
|
||||
mv cpuminer.exe release/cpuminer-avx.exe
|
||||
|
||||
# -march=westmere is supported in gcc5
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $F
|
||||
CFLAGS="-O3 -march=westmere -Wall" ./configure $F
|
||||
#CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $F
|
||||
make
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
||||
|
Reference in New Issue
Block a user