mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.5.0
This commit is contained in:
@@ -77,8 +77,6 @@ cpuminer_SOURCES = \
|
|||||||
algo/fresh.c \
|
algo/fresh.c \
|
||||||
algo/groestl/groestl.c \
|
algo/groestl/groestl.c \
|
||||||
algo/groestl/myr-groestl.c \
|
algo/groestl/myr-groestl.c \
|
||||||
algo/groestl/sse2/grso.c\
|
|
||||||
algo/groestl/sse2/grso-asm.c\
|
|
||||||
algo/groestl/aes_ni/hash-groestl.c \
|
algo/groestl/aes_ni/hash-groestl.c \
|
||||||
algo/groestl/aes_ni/hash-groestl256.c \
|
algo/groestl/aes_ni/hash-groestl256.c \
|
||||||
algo/haval/haval.c\
|
algo/haval/haval.c\
|
||||||
|
359
NEWS
359
NEWS
@@ -1,359 +0,0 @@
|
|||||||
Version 1.2 (Tanguy Pruvot)
|
|
||||||
- Add cryptonight-light (Aeon)
|
|
||||||
- Add Lyra2REv2 algo (Vertcoin)
|
|
||||||
- Allow to load a remote config with curl
|
|
||||||
- Algorithm parameter is now case insensitive
|
|
||||||
- Drop anime algo (dead coin)
|
|
||||||
- Add Sib(coin) algo
|
|
||||||
- Compute and show network diff in pools too
|
|
||||||
- Checkup on arm, tested ok on Tegra K1 (CyanogenMod 12.1)
|
|
||||||
|
|
||||||
version 1.1 (Tanguy Pruvot)
|
|
||||||
- Add basic API remote control (quit/seturl)
|
|
||||||
- Add GroestlCoin, Diamond and Myriad variants
|
|
||||||
- Add Pluck algo and fix gbt query crash
|
|
||||||
- Add ZR5 algo (ZRC) and fix longpoll bug on linux
|
|
||||||
- Add Luffa algo
|
|
||||||
- Add Skein2 algo (Double Skein for Woodcoin)
|
|
||||||
- Add Animecoin algo (Quark variant)
|
|
||||||
- Add Dropcoin pok algo
|
|
||||||
- Add BMW-256 (MDT) algo
|
|
||||||
- Add Axiom algo
|
|
||||||
- Change some logged strings
|
|
||||||
- Use all cores by default, not N-1
|
|
||||||
- Handle a default config to run without params
|
|
||||||
- add cpu-priority and cpu-affinity options
|
|
||||||
- add NSIS installer script for windows setup
|
|
||||||
- Implement background option on windows
|
|
||||||
- add -m stratum option (diff-multiplier)
|
|
||||||
- Time limit to allow benchmarks or cron jobs
|
|
||||||
- Fix Cryptonight stratum support
|
|
||||||
- Allow to disable extranonce support
|
|
||||||
|
|
||||||
version 1.0.9 (Tanguy Pruvot)
|
|
||||||
- pool extranonce subscribe
|
|
||||||
- upgrade jansson
|
|
||||||
- lyra2 algo
|
|
||||||
- fix for solo mining
|
|
||||||
- API websocket support
|
|
||||||
|
|
||||||
Version 1.0.8 (Tanguy Pruvot)
|
|
||||||
- API Monitoring Support
|
|
||||||
- Enhance config values support (int/real/bool)
|
|
||||||
- Rewrite blake algo (speed x2)
|
|
||||||
|
|
||||||
Version 1.0.7 (Tanguy Pruvot)
|
|
||||||
- Add NIST5 and QUBIT algos
|
|
||||||
- Show current stratum bloc height
|
|
||||||
- Fix wallet solo mining
|
|
||||||
|
|
||||||
Version 1.0.6 (Tanguy Pruvot)
|
|
||||||
- Fix scrypt algo
|
|
||||||
- More work on VC2013
|
|
||||||
- Add -f tuning option to test with reduced difficulty
|
|
||||||
- Add S3 algo
|
|
||||||
|
|
||||||
Version 1.0.5 (Tanguy Pruvot)
|
|
||||||
|
|
||||||
- Merge remaining v2.4 cpu-miner changes
|
|
||||||
- Add colored output (disable with --no-color)
|
|
||||||
- Test and fix blake on NEOS, needs 14 rounds (was 8)
|
|
||||||
- Add pentablake (5x blake256) (from bitbandi)
|
|
||||||
- Add neoscrypt
|
|
||||||
- Windows (VC++ 2013 and MinGW64 build support)
|
|
||||||
- Enhance --version informations (compiler + lib versions)
|
|
||||||
|
|
||||||
Version 1.0.4 (Tanguy Pruvot)
|
|
||||||
|
|
||||||
- Add x13 x14 and x15 algos (Sherlockcoin, X14Coin, Webcoin..)
|
|
||||||
- Add scrypt:N variants (Vertcoin)
|
|
||||||
- Add fresh algo
|
|
||||||
- Fix thread khashes/s value output
|
|
||||||
- Add a configure option --disable-assembly
|
|
||||||
|
|
||||||
Version multi 1.0.3 (Lucas Jones)
|
|
||||||
|
|
||||||
- Add new algos :
|
|
||||||
x11 (Darkcoin [DRK], Hirocoin, Limecoin)
|
|
||||||
cryptonight (Bytecoin [BCN], Monero)
|
|
||||||
keccak (Maxcoin HelixCoin, CryptoMeth, Galleon, 365coin, Slothcoin, BitcointalkCoin)
|
|
||||||
hefty1 (Heavycoin)
|
|
||||||
quark (Quarkcoin)
|
|
||||||
skein (Skeincoin, Myriadcoin)
|
|
||||||
shavite3 (INKcoin)
|
|
||||||
blake (Blakecoin)
|
|
||||||
|
|
||||||
- See README.md
|
|
||||||
|
|
||||||
Version 2.4 - May 20, 2014
|
|
||||||
|
|
||||||
- Add support for the getblocktemplate RPC method (BIP 22)
|
|
||||||
- Allow tunnelling Stratum through HTTP proxies
|
|
||||||
- Add a --no-redirect option to ignore redirection requests
|
|
||||||
- Timeout for long polling is now disabled by default
|
|
||||||
- Fix CPU affinity on Linux (kiyominer)
|
|
||||||
- Add support for building under 64-bit Cygwin
|
|
||||||
- Expand version information with build details
|
|
||||||
|
|
||||||
Version 2.3.3 - Feb 27, 2014
|
|
||||||
|
|
||||||
- The --url option is now mandatory
|
|
||||||
- Do not switch to Stratum when using an HTTP proxy
|
|
||||||
- Fix scheduling policy change on Linux (clbr)
|
|
||||||
- Fix CPU affinity on FreeBSD (ache)
|
|
||||||
- Compatibility fixes for various platforms, including Solaris 8
|
|
||||||
and old versions of OS X
|
|
||||||
- A man page for minerd is now available
|
|
||||||
|
|
||||||
Version 2.3.2 - Jul 10, 2013
|
|
||||||
|
|
||||||
- Add optimizations for AVX2-capable x86-64 processors
|
|
||||||
- Ensure that the output stream is flushed after every log message
|
|
||||||
- Fix an undefined-behavior bug in the Stratum code
|
|
||||||
|
|
||||||
Version 2.3.1 - Jun 18, 2013
|
|
||||||
|
|
||||||
- Add a --cert option for specifying an SSL certificate (martinwguy)
|
|
||||||
- Fix a bug that only made SHA-256d mining work at difficulty 1
|
|
||||||
- Fix a couple of compatibility issues with some Stratum servers
|
|
||||||
|
|
||||||
Version 2.3 - Jun 12, 2013
|
|
||||||
|
|
||||||
- Add support for the Stratum mining protocol
|
|
||||||
- Automatically switch to Stratum if the mining server supports
|
|
||||||
the X-Stratum extension, unless --no-stratum is used
|
|
||||||
- Set CPU affinity on FreeBSD (lye)
|
|
||||||
- Fix a bug in libcurl initialization (martinwguy)
|
|
||||||
|
|
||||||
Version 2.2.3 - Aug 5, 2012
|
|
||||||
|
|
||||||
- Add optimized ARM NEON code for scrypt and SHA-256d
|
|
||||||
- Add a --benchmark option that allows offline testing
|
|
||||||
- Support for the X-Reject-Reason extension
|
|
||||||
|
|
||||||
Version 2.2.2 - Jun 7, 2012
|
|
||||||
|
|
||||||
- Various performance improvements for x86 and x86-64
|
|
||||||
- Optimize scrypt for ARMv5E and later processors
|
|
||||||
- Set the priority of miner threads to idle on Windows
|
|
||||||
- Add an option to start minerd as a daemon on POSIX systems
|
|
||||||
|
|
||||||
Version 2.2.1 - May 2, 2012
|
|
||||||
|
|
||||||
- Add optimized code for ARM processors
|
|
||||||
- Support for building on NetBSD and OpenBSD
|
|
||||||
- Various compatibility fixes for AIX (pontius)
|
|
||||||
|
|
||||||
Version 2.2 - Apr 2, 2012
|
|
||||||
|
|
||||||
- Add an optimized SHA-256d algorithm, with specialized code
|
|
||||||
for x86 and x86-64 and support for AVX and XOP instructions
|
|
||||||
- Slight performance increase for scrypt on x86 and x86-64
|
|
||||||
- The default timeout is now 270 seconds
|
|
||||||
|
|
||||||
Version 2.1.5 - Mar 7, 2012
|
|
||||||
|
|
||||||
- Add optimizations for AVX-capable x86-64 processors
|
|
||||||
- Assume HTTP if no protocol is specified for the mining server
|
|
||||||
- Fix MinGW compatibility issues and update build instructions
|
|
||||||
- Add support for building on Solaris using gcc (pontius)
|
|
||||||
|
|
||||||
Version 2.1.4 - Feb 28, 2012
|
|
||||||
|
|
||||||
- Implement 4-way SHA-256 on x86-64
|
|
||||||
- Add TCP keepalive to long polling connections
|
|
||||||
- Support HTTP and SOCKS proxies via the --proxy option
|
|
||||||
- Username and password are no longer mandatory
|
|
||||||
- Add a script that makes assembly code compatible with old versions
|
|
||||||
of the GNU assembler that do not support macros
|
|
||||||
|
|
||||||
Version 2.1.3 - Feb 12, 2012
|
|
||||||
|
|
||||||
- Smart handling of long polling failures: switch to short scan time
|
|
||||||
if long polling fails, and only try to reactivate it if the server
|
|
||||||
continues to advertise the feature in HTTP headers
|
|
||||||
- Add "X-Mining-Extensions: midstate" to HTTP headers (p2k)
|
|
||||||
- Add support for the "submitold" extension, used by p2pool
|
|
||||||
- It is now possible to specify username and password in the URL,
|
|
||||||
like this: http://username:password@host:port/
|
|
||||||
- Add a --version option, and clean up --help output
|
|
||||||
- Avoid division by zero when computing hash rates
|
|
||||||
- Handle empty responses properly (TimothyA)
|
|
||||||
- Eliminate the delay between starting threads
|
|
||||||
|
|
||||||
Version 2.1.2 - Jan 26, 2012
|
|
||||||
|
|
||||||
- Do not submit work that is known to be stale
|
|
||||||
- Allow miner threads to ask for new work if the current one is at least
|
|
||||||
45 seconds old and long polling is enabled
|
|
||||||
- Refresh work when long polling times out
|
|
||||||
- Fix minor speed regression
|
|
||||||
- Modify x86-64 code to make it compatible with older versions of binutils
|
|
||||||
|
|
||||||
Version 2.1.1 - Jan 20, 2012
|
|
||||||
|
|
||||||
- Handle network errors properly
|
|
||||||
- Make scantime retargeting more accurate
|
|
||||||
|
|
||||||
Version 2.1 - Jan 19, 2012
|
|
||||||
|
|
||||||
- Share the same work among all threads
|
|
||||||
- Do not ask for new work if the current one is not expired
|
|
||||||
- Do not discard the work returned by long polling
|
|
||||||
|
|
||||||
Version 2.0 - Jan 16, 2012
|
|
||||||
|
|
||||||
- Change default port to 9332 for Litecoin and remove default credentials
|
|
||||||
- Add 'scrypt' as the default algorithm and remove other algorithms (ArtForz)
|
|
||||||
- Optimize scrypt for x86 and x86-64
|
|
||||||
- Make scantime retargeting less granular (ArtForz)
|
|
||||||
- Test the whole hash instead of just looking at the high 32 bits
|
|
||||||
- Add configurable timeout, with a default of 180 seconds
|
|
||||||
- Add share summary output (inlikeflynn)
|
|
||||||
- Fix priority and CPU count detection on Windows
|
|
||||||
- Fix parameters -u and -p, and add short options -o and -O
|
|
||||||
|
|
||||||
Version 1.0.2 - Jun 13, 2011
|
|
||||||
|
|
||||||
- Linux x86_64 optimisations - Con Kolivas
|
|
||||||
- Optimise for x86_64 by default by using sse2_64 algo
|
|
||||||
- Detects CPUs and sets number of threads accordingly
|
|
||||||
- Uses CPU affinity for each thread where appropriate
|
|
||||||
- Sets scheduling policy to lowest possible
|
|
||||||
- Minor performance tweaks
|
|
||||||
|
|
||||||
Version 1.0.1 - May 14, 2011
|
|
||||||
|
|
||||||
- OSX support
|
|
||||||
|
|
||||||
Version 1.0 - May 9, 2011
|
|
||||||
|
|
||||||
- jansson 2.0 compatibility
|
|
||||||
- correct off-by-one in date (month) display output
|
|
||||||
- fix platform detection
|
|
||||||
- improve yasm configure bits
|
|
||||||
- support full URL, in X-Long-Polling header
|
|
||||||
|
|
||||||
Version 0.8.1 - March 22, 2011
|
|
||||||
|
|
||||||
- Make --user, --pass actually work
|
|
||||||
|
|
||||||
- Add User-Agent HTTP header to requests, so that server operators may
|
|
||||||
more easily identify the miner client.
|
|
||||||
|
|
||||||
- Fix minor bug in example JSON config file
|
|
||||||
|
|
||||||
Version 0.8 - March 21, 2011
|
|
||||||
|
|
||||||
- Support long polling: http://deepbit.net/longpolling.php
|
|
||||||
|
|
||||||
- Adjust max workload based on scantime (default 5 seconds,
|
|
||||||
or 60 seconds for longpoll)
|
|
||||||
|
|
||||||
- Standardize program output, and support syslog on Unix platforms
|
|
||||||
|
|
||||||
- Suport --user/--pass options (and "user" and "pass" in config file),
|
|
||||||
as an alternative to the current --userpass
|
|
||||||
|
|
||||||
Version 0.7.2 - March 14, 2011
|
|
||||||
|
|
||||||
- Add port of ufasoft's sse2 assembly implementation (Linux only)
|
|
||||||
This is a substantial speed improvement on Intel CPUs.
|
|
||||||
|
|
||||||
- Move all JSON-RPC I/O to separate thread. This reduces the
|
|
||||||
number of HTTP connections from one-per-thread to one, reducing resource
|
|
||||||
usage on upstream bitcoind / pool server.
|
|
||||||
|
|
||||||
Version 0.7.1 - March 2, 2011
|
|
||||||
|
|
||||||
- Add support for JSON-format configuration file. See example
|
|
||||||
file example-cfg.json. Any long argument on the command line
|
|
||||||
may be stored in the config file.
|
|
||||||
- Timestamp each solution found
|
|
||||||
- Improve sha256_4way performance. NOTE: This optimization makes
|
|
||||||
the 'hash' debug-print output for sha256_way incorrect.
|
|
||||||
- Use __builtin_expect() intrinsic as compiler micro-optimization
|
|
||||||
- Build on Intel compiler
|
|
||||||
- HTTP library now follows HTTP redirects
|
|
||||||
|
|
||||||
Version 0.7 - February 12, 2011
|
|
||||||
|
|
||||||
- Re-use CURL object, thereby reuseing DNS cache and HTTP connections
|
|
||||||
- Use bswap_32, if compiler intrinsic is not available
|
|
||||||
- Disable full target validation (as opposed to simply H==0) for now
|
|
||||||
|
|
||||||
Version 0.6.1 - February 4, 2011
|
|
||||||
|
|
||||||
- Fully validate "hash < target", rather than simply stopping our scan
|
|
||||||
if the high 32 bits are 00000000.
|
|
||||||
- Add --retry-pause, to set length of pause time between failure retries
|
|
||||||
- Display proof-of-work hash and target, if -D (debug mode) enabled
|
|
||||||
- Fix max-nonce auto-adjustment to actually work. This means if your
|
|
||||||
scan takes longer than 5 seconds (--scantime), the miner will slowly
|
|
||||||
reduce the number of hashes you work on, before fetching a new work unit.
|
|
||||||
|
|
||||||
Version 0.6 - January 29, 2011
|
|
||||||
|
|
||||||
- Fetch new work unit, if scanhash takes longer than 5 seconds (--scantime)
|
|
||||||
- BeeCee1's sha256 4way optimizations
|
|
||||||
- lfm's byte swap optimization (improves via, cryptopp)
|
|
||||||
- Fix non-working short options -q, -r
|
|
||||||
|
|
||||||
Version 0.5 - December 28, 2010
|
|
||||||
|
|
||||||
- Exit program, when all threads have exited
|
|
||||||
- Improve JSON-RPC failure diagnostics and resilience
|
|
||||||
- Add --quiet option, to disable hashmeter output.
|
|
||||||
|
|
||||||
Version 0.3.3 - December 27, 2010
|
|
||||||
|
|
||||||
- Critical fix for sha256_cryptopp 'cryptopp_asm' algo
|
|
||||||
|
|
||||||
Version 0.3.2 - December 23, 2010
|
|
||||||
|
|
||||||
- Critical fix for sha256_via
|
|
||||||
|
|
||||||
Version 0.3.1 - December 19, 2010
|
|
||||||
|
|
||||||
- Critical fix for sha256_via
|
|
||||||
- Retry JSON-RPC failures (see --retry, under "--help" output)
|
|
||||||
|
|
||||||
Version 0.3 - December 18, 2010
|
|
||||||
|
|
||||||
- Add crypto++ 32bit assembly implementation
|
|
||||||
- show version upon 'minerd --help'
|
|
||||||
- work around gcc 4.5.x bug that killed 4way performance
|
|
||||||
|
|
||||||
Version 0.2.2 - December 6, 2010
|
|
||||||
|
|
||||||
- VIA padlock implementation works now
|
|
||||||
- Minor build and runtime fixes
|
|
||||||
|
|
||||||
Version 0.2.1 - November 29, 2010
|
|
||||||
|
|
||||||
- avoid buffer overflow when submitting solutions
|
|
||||||
- add Crypto++ sha256 implementation (C only, ASM elided for now)
|
|
||||||
- minor internal optimizations and cleanups
|
|
||||||
|
|
||||||
Version 0.2 - November 27, 2010
|
|
||||||
|
|
||||||
- Add script for building a Windows installer
|
|
||||||
- improve hash performance (hashmeter) statistics
|
|
||||||
- add tcatm 4way sha256 implementation
|
|
||||||
- Add experimental VIA Padlock sha256 implementation
|
|
||||||
|
|
||||||
Version 0.1.2 - November 26, 2010
|
|
||||||
|
|
||||||
- many small cleanups and micro-optimizations
|
|
||||||
- build win32 exe using mingw
|
|
||||||
- RPC URL, username/password become command line arguments
|
|
||||||
- remove unused OpenSSL dependency
|
|
||||||
|
|
||||||
Version 0.1.1 - November 24, 2010
|
|
||||||
|
|
||||||
- Do not build sha256_generic module separately from cpuminer.
|
|
||||||
|
|
||||||
Version 0.1 - November 24, 2010
|
|
||||||
|
|
||||||
- Initial release.
|
|
||||||
|
|
333
README.md
333
README.md
@@ -1,262 +1,117 @@
|
|||||||
This project is forked by Jay D Dee.
|
cpuminer-opt is a fork of cpuminer-multi by TPruvot with optimizations
|
||||||
|
imported from other miners developped by lucas Jones, djm34, Wolf0, pooler,
|
||||||
|
Jeff garzik, ig0tik3d, elmad, palmd, and Optiminer, with additional
|
||||||
|
optimizations by Jay D Dee.
|
||||||
|
|
||||||
Updated for v3.3.2 Windows support.
|
All of the code is believed to be open and free. If anyone has a
|
||||||
|
claim to any of it post your case in the icpuminer-opt Bitcoin Talk forum
|
||||||
|
or by email.
|
||||||
|
|
||||||
Building on linux prerequisites:
|
https://bitcointalk.org/index.php?topic=1326803.0
|
||||||
|
|
||||||
It is assumed users know how to install packages on their system and
|
mailto://jayddee246@gmail.com
|
||||||
be able to compile standard source packages. This is basic Linux and
|
|
||||||
beyond the scope of cpuminer-opt.
|
|
||||||
|
|
||||||
Make sure you have the basic development packages installed.
|
See file RELEASE_NOTES for change log and compile instructions.
|
||||||
Here is a good start:
|
|
||||||
|
|
||||||
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
|
Supported Algorithms
|
||||||
|
--------------------
|
||||||
|
|
||||||
Install any additional dependencies needed by cpuminer-opt. The list below
|
argon2
|
||||||
are some of the ones that may not be in the default install and need to
|
axiom Shabal-256 MemoHash
|
||||||
be installed manually. There may be others, read the error messages they
|
bastion
|
||||||
will give a clue as to the missing package.
|
blake Blake-256 (SFR)
|
||||||
|
blakecoin blake256r8
|
||||||
|
blake2s Blake-2 S
|
||||||
|
bmw BMW 256
|
||||||
|
c11 Flax
|
||||||
|
cryptolight Cryptonight-light
|
||||||
|
cryptonight cryptonote, Monero (XMR)
|
||||||
|
decred
|
||||||
|
drop Dropcoin
|
||||||
|
fresh Fresh
|
||||||
|
groestl groestl
|
||||||
|
heavy Heavy
|
||||||
|
hmq1725 Espers
|
||||||
|
hodl Hodlcoin
|
||||||
|
keccak Keccak
|
||||||
|
lbry LBC, LBRY Credits
|
||||||
|
luffa Luffa
|
||||||
|
lyra2re lyra2
|
||||||
|
lyra2rev2 lyrav2
|
||||||
|
lyra2z Zcoin (XZC)
|
||||||
|
lyra2zoin Zoin (ZOI)
|
||||||
|
m7m Magi (XMG)
|
||||||
|
myr-gr Myriad-Groestl
|
||||||
|
neoscrypt NeoScrypt(128, 2, 1)
|
||||||
|
nist5 Nist5
|
||||||
|
pluck Pluck:128 (Supcoin)
|
||||||
|
pentablake Pentablake
|
||||||
|
quark Quark
|
||||||
|
qubit Qubit
|
||||||
|
scrypt scrypt(1024, 1, 1) (default)
|
||||||
|
scrypt:N scrypt(N, 1, 1)
|
||||||
|
scryptjane:nf
|
||||||
|
sha256d SHA-256d
|
||||||
|
shavite3 Shavite3
|
||||||
|
skein Skein+Sha (Skeincoin)
|
||||||
|
skein2 Double Skein (Woodcoin)
|
||||||
|
vanilla blake256r8vnl (VCash)
|
||||||
|
veltor
|
||||||
|
whirlpool
|
||||||
|
whirlpoolx
|
||||||
|
x11 X11
|
||||||
|
x11evo Revolvercoin
|
||||||
|
x11gost sib (SibCoin)
|
||||||
|
x13 X13
|
||||||
|
x14 X14
|
||||||
|
x15 X15
|
||||||
|
x17
|
||||||
|
xevan Bitsend
|
||||||
|
yescrypt
|
||||||
|
zr5 Ziftr
|
||||||
|
|
||||||
The folliwing command should install everything you need on Debian based
|
Requirements
|
||||||
packages:
|
------------
|
||||||
|
|
||||||
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake
|
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
|
||||||
|
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
|
||||||
|
optimizations a CPU with AES_NI is required. This includes Intel Westbridge
|
||||||
|
and newer and AMD equivalents. Further optimizations are available on some
|
||||||
|
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
|
||||||
|
|
||||||
Building on Linux, see below for Windows.
|
Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
|
||||||
|
performance.
|
||||||
|
|
||||||
Dependencies
|
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
|
||||||
|
Centos are known to work and have all dependencies in their repositories.
|
||||||
|
Others may work but may require more effort. 64 bit Windows OS is now supported
|
||||||
|
with mingw_w64 and msys.
|
||||||
|
|
||||||
build-essential (for Ubuntu, Development Tools package group on Fedora)
|
3. Stratum pool, cpuminer-opt only supports stratum minning.
|
||||||
automake
|
|
||||||
libjansson-dev
|
|
||||||
libgmp-dev
|
|
||||||
libcurl4-openssl-dev
|
|
||||||
libssl-dev
|
|
||||||
pthreads
|
|
||||||
zlib
|
|
||||||
|
|
||||||
tar xvzf [file.tar.gz]
|
Errata
|
||||||
cd [file]
|
------
|
||||||
|
|
||||||
Run build.sh to build on Linux or execute the following commands.
|
cpuminer-opt does not work mining Decred algo at Nicehash and produces
|
||||||
|
only "invalid extranonce2 size" rejects. It works at Zpool.
|
||||||
|
|
||||||
./autogen.sh
|
Benchmark testing does not work for x11evo.
|
||||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
|
||||||
make
|
|
||||||
|
|
||||||
Start mining.
|
Bugs
|
||||||
|
----
|
||||||
|
|
||||||
./cpuminer -a algo ...
|
Users are encouraged to post their bug reports on the Bitcoin Talk
|
||||||
|
forum at:
|
||||||
|
|
||||||
Building on Windows prerequisites:
|
https://bitcointalk.org/index.php?topic=1326803.0
|
||||||
|
|
||||||
msys
|
|
||||||
mingw_w64
|
|
||||||
Visual C++ redistributable 2008 X64
|
|
||||||
openssl, not sure about this
|
|
||||||
|
|
||||||
Install msys and mingw_w64, only needed once.
|
|
||||||
|
|
||||||
Unpack msys into C:\msys or your preferred directory.
|
|
||||||
|
|
||||||
Install mingw__w64 from win-builds.
|
|
||||||
Follow instructions, check "msys or cygwin" and "x86_64" and accept default
|
|
||||||
existing msys instalation.
|
|
||||||
|
|
||||||
Open a msys shell by double clicking on msys.bat.
|
|
||||||
Note that msys shell uses linux syntax for file specifications, "C:\" is
|
|
||||||
mounted at "/c/".
|
|
||||||
|
|
||||||
Add mingw bin directory to PATH variable
|
|
||||||
PATH="/c/msys/opt/windows_64/bin/:$PATH"
|
|
||||||
|
|
||||||
Instalation complete, compile cpuminer-opt
|
|
||||||
|
|
||||||
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
|
|
||||||
or similar Windows program.
|
|
||||||
|
|
||||||
In msys shell cd to miner directory.
|
|
||||||
cd /c/path/to/cpuminer-opt
|
|
||||||
|
|
||||||
Run winbuild.sh to build on Windows or execute the following commands.
|
|
||||||
|
|
||||||
./autogen.sh
|
|
||||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
|
||||||
make
|
|
||||||
|
|
||||||
The following tips may be useful for older AMD CPUs.
|
|
||||||
|
|
||||||
Some users with AMD CPUs without AES_NI have reported problems compiling
|
|
||||||
with build.sh or "-march=native". Problems have included compile errors
|
|
||||||
and poor performance. These users are recommended to compile manually
|
|
||||||
specifying "-march=btver1" on the configure command line.
|
|
||||||
|
|
||||||
Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
|
||||||
cpuminer-multi by TPruvot supports this architecture.
|
|
||||||
|
|
||||||
The rest of this file is taken from cpuminer-multi.
|
|
||||||
|
|
||||||
----------------
|
|
||||||
|
|
||||||
|
|
||||||
CPUMiner-Multi
|
|
||||||
==============
|
|
||||||
|
|
||||||
[](https://travis-ci.org/tpruvot/cpuminer-multi)
|
|
||||||
|
|
||||||
This is a multi-threaded CPU miner,
|
|
||||||
fork of [pooler](//github.com/pooler)'s cpuminer (see AUTHORS for list of contributors).
|
|
||||||
|
|
||||||
#### Table of contents
|
|
||||||
|
|
||||||
* [Algorithms](#algorithms)
|
|
||||||
* [Dependencies](#dependencies)
|
|
||||||
* [Download](#download)
|
|
||||||
* [Build](#build)
|
|
||||||
* [Usage instructions](#usage-instructions)
|
|
||||||
* [Donations](#donations)
|
|
||||||
* [Credits](#credits)
|
|
||||||
* [License](#license)
|
|
||||||
|
|
||||||
Algorithms
|
|
||||||
==========
|
|
||||||
#### Currently supported
|
|
||||||
* ✓ __scrypt__ (Litecoin, Dogecoin, Feathercoin, ...)
|
|
||||||
* ✓ __scrypt:N__
|
|
||||||
* ✓ __sha256d__ (Bitcoin, Freicoin, Peercoin/PPCoin, Terracoin, ...)
|
|
||||||
* ✓ __axiom__ (Axiom Shabal-256 based MemoHash)
|
|
||||||
* ✓ __blake__ (Saffron [SFR] Blake-256)
|
|
||||||
* ✓ __bmw__ (Midnight [MDT] BMW-256)
|
|
||||||
* ✓ __cryptonight__ (Bytecoin [BCN], Monero)
|
|
||||||
* ✓ __cryptonight-light__ (Aeon)
|
|
||||||
* ✓ __dmd-gr__ (Diamond-Groestl)
|
|
||||||
* ✓ __fresh__ (FreshCoin)
|
|
||||||
* ✓ __groestl__ (Groestlcoin)
|
|
||||||
* ✓ __lyra2RE__ (Lyrabar, Cryptocoin)
|
|
||||||
* ✓ __lyra2REv2__ (VertCoin [VTC])
|
|
||||||
* ✓ __myr-gr__ (Myriad-Groestl)
|
|
||||||
* ✓ __neoscrypt__ (Feathercoin)
|
|
||||||
* ✓ __nist5__ (MistCoin [MIC], TalkCoin [TAC], ...)
|
|
||||||
* ✓ __pentablake__ (Joincoin)
|
|
||||||
* ✓ __pluck__ (Supcoin [SUP])
|
|
||||||
* ✓ __quark__ (Quarkcoin)
|
|
||||||
* ✓ __qubit__ (MyriadCoin [MYR])
|
|
||||||
* ✓ __skein__ (Skeincoin, Myriadcoin, Xedoscoin, ...)
|
|
||||||
* ✓ __skein2__ (Woodcoin)
|
|
||||||
* ✓ __s3__ (OneCoin)
|
|
||||||
* ✓ __x11__ (Darkcoin [DRK], Hirocoin, Limecoin, ...)
|
|
||||||
* ✓ __x13__ (Sherlockcoin, [ACE], [B2B], [GRC], [XHC], ...)
|
|
||||||
* ✓ __x14__ (X14, Webcoin [WEB])
|
|
||||||
* ✓ __x15__ (RadianceCoin [RCE])
|
|
||||||
* ✓ __zr5__ (Ziftrcoin [ZRC])
|
|
||||||
|
|
||||||
#### Implemented, but untested
|
|
||||||
* ? blake2s
|
|
||||||
* ? hefty1 (Heavycoin)
|
|
||||||
* ? keccak (Maxcoin HelixCoin, CryptoMeth, Galleon, 365coin, Slothcoin, BitcointalkCoin)
|
|
||||||
* ? luffa (Joincoin, Doomcoin)
|
|
||||||
* ? shavite3 (INKcoin)
|
|
||||||
* ? sib X11 + gost (SibCoin)
|
|
||||||
|
|
||||||
#### Planned support for
|
|
||||||
* *scrypt-jane* (YaCoin, CopperBars, Pennies, Tickets, etc..)
|
|
||||||
|
|
||||||
Dependencies
|
|
||||||
============
|
|
||||||
* libcurl http://curl.haxx.se/libcurl/
|
|
||||||
* jansson http://www.digip.org/jansson/ (jansson source is included in-tree)
|
|
||||||
* openssl libcrypto https://www.openssl.org/
|
|
||||||
* pthreads
|
|
||||||
* zlib (for curl/ssl)
|
|
||||||
|
|
||||||
Download
|
|
||||||
========
|
|
||||||
* Windows releases: https://github.com/tpruvot/cpuminer-multi/releases
|
|
||||||
* Git tree: https://github.com/tpruvot/cpuminer-multi
|
|
||||||
* Clone with `git clone https://github.com/tpruvot/cpuminer-multi`
|
|
||||||
|
|
||||||
Build
|
|
||||||
=====
|
|
||||||
|
|
||||||
#### Basic *nix build instructions:
|
|
||||||
* just use ./build.sh
|
|
||||||
_OR_
|
|
||||||
* ./autogen.sh # only needed if building from git repo
|
|
||||||
* ./nomacro.pl # only needed if building on Mac OS X or with Clang
|
|
||||||
* ./configure CFLAGS="-O3 -march=native" --with-crypto --with-curl
|
|
||||||
* # Use -march=native if building for a single machine
|
|
||||||
* make
|
|
||||||
|
|
||||||
#### Notes for AIX users:
|
|
||||||
* To build a 64-bit binary, export OBJECT_MODE=64
|
|
||||||
* GNU-style long options are not supported, but are accessible via configuration file
|
|
||||||
|
|
||||||
#### Basic Windows build with Visual Studio 2013
|
|
||||||
* All the required .lib files are now included in tree (windows only)
|
|
||||||
* AVX enabled by default for x64 platform (AVX2 and XOP could also be used)
|
|
||||||
|
|
||||||
#### Basic Windows build instructions, using MinGW64:
|
|
||||||
* Install MinGW64 and the MSYS Developer Tool Kit (http://www.mingw.org/)
|
|
||||||
* Make sure you have mstcpip.h in MinGW\include
|
|
||||||
* install pthreads-w64
|
|
||||||
* Install libcurl devel (http://curl.haxx.se/download.html)
|
|
||||||
* Make sure you have libcurl.m4 in MinGW\share\aclocal
|
|
||||||
* Make sure you have curl-config in MinGW\bin
|
|
||||||
* Install openssl devel (https://www.openssl.org/related/binaries.html)
|
|
||||||
* In the MSYS shell, run:
|
|
||||||
* for 64bit, you can use ./mingw64.sh else :
|
|
||||||
./autogen.sh # only needed if building from git repo
|
|
||||||
* LIBCURL="-lcurldll" ./configure CFLAGS="*-march=native*"
|
|
||||||
* # Use -march=native if building for a single machine
|
|
||||||
* make
|
|
||||||
|
|
||||||
#### Architecture-specific notes:
|
|
||||||
* ARM:
|
|
||||||
* No runtime CPU detection. The miner can take advantage of some instructions specific to ARMv5E and later processors, but the decision whether to use them is made at compile time, based on compiler-defined macros.
|
|
||||||
* To use NEON instructions, add "-mfpu=neon" to CFLAGS.
|
|
||||||
* x86:
|
|
||||||
* The miner checks for SSE2 instructions support at runtime, and uses them if they are available.
|
|
||||||
* x86-64:
|
|
||||||
* The miner can take advantage of AVX, AVX2 and XOP instructions, but only if both the CPU and the operating system support them.
|
|
||||||
* Linux supports AVX starting from kernel version 2.6.30.
|
|
||||||
* FreeBSD supports AVX starting with 9.1-RELEASE.
|
|
||||||
* Mac OS X added AVX support in the 10.6.8 update.
|
|
||||||
* Windows supports AVX starting from Windows 7 SP1 and Windows Server 2008 R2 SP1.
|
|
||||||
* The configure script outputs a warning if the assembler doesn't support some instruction sets. In that case, the miner can still be built, but unavailable optimizations are left off.
|
|
||||||
|
|
||||||
Usage instructions
|
|
||||||
==================
|
|
||||||
Run "cpuminer --help" to see options.
|
|
||||||
|
|
||||||
### Connecting through a proxy
|
|
||||||
|
|
||||||
Use the --proxy option.
|
|
||||||
|
|
||||||
To use a SOCKS proxy, add a socks4:// or socks5:// prefix to the proxy host
|
|
||||||
Protocols socks4a and socks5h, allowing remote name resolving, are also available since libcurl 7.18.0.
|
|
||||||
|
|
||||||
If no protocol is specified, the proxy is assumed to be a HTTP proxy.
|
|
||||||
When the --proxy option is not used, the program honors the http_proxy and all_proxy environment variables.
|
|
||||||
|
|
||||||
Donations
|
Donations
|
||||||
=========
|
---------
|
||||||
Donations for the work done in this fork are accepted :
|
|
||||||
|
|
||||||
Tanguy Pruvot :
|
I do not do this for money but I have a donation address if users
|
||||||
* BTC: `1FhDPLPpw18X4srecguG3MxJYe4a1JsZnd`
|
are so inclined.
|
||||||
* ZRC: `ZX6LmrCwphNgitxvDnf8TX6Tsegfxpeozx`
|
|
||||||
|
|
||||||
Lucas Jones :
|
bitcoin:12tdvfF7KmAsihBXQXynT6E6th2c2pByTT?label=donations
|
||||||
* MRO: `472haywQKoxFzf7asaQ4XKBc2foAY4ezk8HiN63ifW4iAbJiLnfmJfhHSR9XmVKw2WYPnszJV9MEHj9Z5WMK9VCNHaGLDmJ`
|
|
||||||
* BTC: `139QWoktddChHsZMWZFxmBva4FM96X2dhE`
|
|
||||||
|
|
||||||
Credits
|
Happy mining!
|
||||||
=======
|
|
||||||
CPUMiner-multi was forked from pooler's CPUMiner, and has been started by Lucas Jones.
|
|
||||||
* [tpruvot](https://github.com/tpruvot) added all the recent features and newer algorythmns
|
|
||||||
* [Wolf9466](https://github.com/wolf9466) helped with Intel AES-NI support for CryptoNight
|
|
||||||
|
|
||||||
License
|
|
||||||
=======
|
|
||||||
GPLv2. See COPYING for details.
|
|
||||||
|
@@ -1,85 +0,0 @@
|
|||||||
cpuminer-opt now supports over 40 algorithms on CPUs with at least SSE2
|
|
||||||
capabilities including Intel Core2, Nehalem and AMD equivalent. See the
|
|
||||||
performance chart below for details.
|
|
||||||
|
|
||||||
In addition 19 algorithms have optimizations to take advantage of
|
|
||||||
CPUs with AES_NI for even greater performance, including the Intel
|
|
||||||
Westbridge and newer and AMD equivalent. See the performance
|
|
||||||
comparison below.
|
|
||||||
|
|
||||||
New in 3.4.12
|
|
||||||
|
|
||||||
- lyra2z (zcoin) modified for blocks after 8192
|
|
||||||
- fixed scryptjane to support various N factors
|
|
||||||
|
|
||||||
Users with non-SSE2 CPUs or who want to mine algos not supported by
|
|
||||||
cpuminer-opt may find cpuminer-multi by TPruvot useful.
|
|
||||||
|
|
||||||
Chart out of date, will be removed.
|
|
||||||
|
|
||||||
The performance chart below is for an Intel i7-6700K @ 4 GHz, 16 GB mem.
|
|
||||||
|
|
||||||
Normalization rates have been added to the chart to help with profit
|
|
||||||
switching pools. Reference algo x11 = 1.
|
|
||||||
|
|
||||||
Due to the peculiarities of some algorithms their performance on other CPU
|
|
||||||
architectures may not scale equally. Their normalizations rates will also
|
|
||||||
differ from those listed below. YMMV.
|
|
||||||
|
|
||||||
Normalized profitability = algo profitability * norm rate
|
|
||||||
|
|
||||||
AES-AVX SSE2(1) norm rate(5)
|
|
||||||
------- ------- ---------
|
|
||||||
x11 780 K 525 K 1
|
|
||||||
x13 392 298 0.50
|
|
||||||
x14 370 271 0.48
|
|
||||||
x15 341 270 0.45
|
|
||||||
x17 317 248 0.43
|
|
||||||
x11gost 562 392 0.72
|
|
||||||
x11evo 590 387 0.78
|
|
||||||
quark 1195 924 1.61
|
|
||||||
qubit 1182 765 1.45
|
|
||||||
nist5 2000 1592 3.37
|
|
||||||
zr5 850 650 1.15
|
|
||||||
c11 784 475 0.99
|
|
||||||
myr-gr 1572 1560 2.12
|
|
||||||
hmq1725 214 161 0.29
|
|
||||||
m7m 121 77.4 0.155
|
|
||||||
lyra2re 1380 900 1.76
|
|
||||||
lyra2rev2 1350 980 1.73
|
|
||||||
cryptonight 290 H 165 H 0.00039
|
|
||||||
cryptolight 685 ? 0.00093
|
|
||||||
hodl 600 200 0.00081
|
|
||||||
lbry (4) 2620 3.53
|
|
||||||
neoscrypt (4) 32 K 0.043
|
|
||||||
argon2 (4) 33.7 0.045
|
|
||||||
groestl (4) 931 1.26
|
|
||||||
skein (4) 5747 7.77
|
|
||||||
skein2 (4) 8675 11.7
|
|
||||||
pentablake (4) 3960 5.35
|
|
||||||
keccak (4) 7790 10.5
|
|
||||||
scrypt (4) 113 0.153
|
|
||||||
sha256d (4) 62.5 0.084
|
|
||||||
veltor (4) 1017 1.30
|
|
||||||
blake (4) 22.4 M 30.4
|
|
||||||
blake2s (4) 19.0 25.7
|
|
||||||
vanilla (4) 33.0 44.6
|
|
||||||
blakecoin (4) 33.9 45.8
|
|
||||||
decred (4) 22.6 30.5
|
|
||||||
axiom (4) 72 H 0.000098
|
|
||||||
yescrypt (4) 3760 0.0051
|
|
||||||
scryptjane (4) 250 0.00034
|
|
||||||
pluck(2) (4) 1925 0.0026
|
|
||||||
drop(2) (4) 934 K 1.26
|
|
||||||
fresh(2) (4) 528 0.71
|
|
||||||
whirlpool(2) (4) 1290 1.74
|
|
||||||
whirlpoolx(2) (4) 5110 6.9
|
|
||||||
|
|
||||||
Footnotes:
|
|
||||||
(1) SSE2 rates are simulated in software (-march=core2) on an i7.
|
|
||||||
(2) Benchmark tested only
|
|
||||||
(3) CPU architecture not supported for algo. It won't work.
|
|
||||||
(4) AES_NI Optimization not available for CPU artchitecture. Uses SSE2, slower.
|
|
||||||
(5) Normalised profitability = algo profitability * norm rate, x11 = 1
|
|
||||||
(6) Not supported on Windows
|
|
||||||
|
|
235
RELEASE_NOTES
235
RELEASE_NOTES
@@ -1,81 +1,204 @@
|
|||||||
|
Change Log
|
||||||
|
----------
|
||||||
|
|
||||||
cpuminer-opt-3.1 release notes
|
v3.5.0
|
||||||
--------------i----------------
|
|
||||||
|
|
||||||
cpuminer-opt combines the best of minerd (x11), cp3u (quark) and
|
Fixed blakecoin and vanilla increasing rejects with number of threads.
|
||||||
cpuminer-multi (multi-algo support plus non-kernel related
|
Removed support for SSE2 Groestl functions. SSE2 groestl remains available
|
||||||
enhancements). Additional credits to Lucas Jones, elmad, palmd,
|
in v3.4.12 and the legacy branch.
|
||||||
djm34, pooler, Jeff Garzik, Wolf0 and probably others.
|
It is no longer necessary to specify stratum+tcp:// in the url, it is assumed
|
||||||
|
and is the only supported protocol.
|
||||||
|
|
||||||
The core of cpuminer-opt remains cpuminer-multi and is the base for
|
v3.4.12
|
||||||
this fork.
|
|
||||||
|
|
||||||
All of the code is believed to be open and free. If anyone has a
|
lyra2z (zcoin) modified for blocks after 8192
|
||||||
claim to any of it post your case in the Bitcoin Talk forum,
|
fixed scryptjane to support various N factors
|
||||||
link below.
|
|
||||||
|
|
||||||
Features
|
v3.4.11
|
||||||
--------
|
|
||||||
|
|
||||||
V3.1 introduces a new mining engine called algo_gate. This fetaure
|
groestl algo AES optimized +200%
|
||||||
is not visible to the users excetp for the additional 5% performance
|
myr-gr algo AES optimized +100%
|
||||||
increase in all algos. This feature is of interest mostly to
|
|
||||||
developpers.
|
|
||||||
|
|
||||||
cpuminer provides accelerated hashing on AES-NI capable CPUs in
|
v3.4.10
|
||||||
x11, x13, x14, x15, quark & qubit algorithms. It also currently
|
|
||||||
provides acceleration for SSE2 capable CPUs on quark and qubit
|
|
||||||
algorithms only. Other algorithms are available but unchanged from
|
|
||||||
cpuminer-multi-1.2pre and in various states of functionality.
|
|
||||||
V3.0 pprovides improved hash rates for many algos. See the
|
|
||||||
release annoucent for details.
|
|
||||||
|
|
||||||
Requirements
|
xevan AES optimized +35%
|
||||||
------------
|
|
||||||
|
|
||||||
A 64 bit CPU with SSE2 support and any of the popular 64 bit
|
v3.4.9
|
||||||
Linux distributions. Standard development tools, libcurl-devel,
|
|
||||||
the preferred SSL development package of your distribution.
|
|
||||||
|
|
||||||
Limitations
|
fixed zr5, broken in v3.4.8
|
||||||
-----------
|
added xevan algo (Bitsend, BSD) with 10% improvement
|
||||||
|
added lyra2zoin (Zoin, ZOI) fully optimized but YMMV
|
||||||
|
|
||||||
v3.0 is source code only that can be compiled on Linux.
|
v3.4.8
|
||||||
Windows support is not yet available, but planned.
|
|
||||||
|
|
||||||
Compiling
|
added zcoin support, optimized for AVX2 but no increase in performance
|
||||||
---------
|
fixed API display of diff for cryptonight
|
||||||
|
--show-diff is now the default, use "--hide-diff" to disable
|
||||||
|
cleaned up some cpuminer-multi artifacts
|
||||||
|
|
||||||
After unpacking the tarball change ito the cpuminer directory and
|
v3.4.7
|
||||||
execute these commands. Note that O3 is actually the upper case
|
|
||||||
letter O.
|
|
||||||
|
|
||||||
./autogen.sh
|
fixed benchmark, except for x11evo
|
||||||
./configure CFLAGS="-O3 -march=native" --with-crypto --with-curl
|
added CPU temperature to share submission report (Linux only)
|
||||||
|
|
||||||
|
v3.4.6
|
||||||
|
|
||||||
|
For users:
|
||||||
|
- cryptolight algo is now supported with AES optimizations
|
||||||
|
- display format changed for share submissions
|
||||||
|
- colour keyed "Accepted" or "Rejected" status.
|
||||||
|
- reject count and rate displayed when share is rejected.
|
||||||
|
|
||||||
|
For developers:
|
||||||
|
|
||||||
|
- code restructuring for detecting new work
|
||||||
|
- cleaned up detection and handling of new work
|
||||||
|
- removed call to stratum_gen_work from niner_thread.
|
||||||
|
- eliminated gen_work_now gate function.
|
||||||
|
- renamed gate function init_nonce to get_new_work.
|
||||||
|
- renamed gate function alloc_scratchbuf to miner_thread_init,
|
||||||
|
removed all scracthbuf references from miner_thread and moved
|
||||||
|
implementation to the local algo files of those algos that need it.
|
||||||
|
- moved most gate targets from algo-gate.c to cpu-miner.c removing
|
||||||
|
most mining related code from algo-gate-api.c.
|
||||||
|
|
||||||
|
v3.4.5
|
||||||
|
|
||||||
|
fixed stale share rejects mining cryptonight at Nicehash
|
||||||
|
fixed compile error on Westmere CPUs
|
||||||
|
|
||||||
|
v3.4.4
|
||||||
|
|
||||||
|
fixed compile errors on Westmere CPUs, this is an interim fix that
|
||||||
|
will compile without AES on Westmere
|
||||||
|
added support for cryptonight at Nicehash, some rejects may be produced
|
||||||
|
at Nicehash only.
|
||||||
|
|
||||||
|
v3.4.3
|
||||||
|
|
||||||
|
imported optimized m7m, +42%
|
||||||
|
|
||||||
|
v3.4.2
|
||||||
|
|
||||||
|
added veltor algo
|
||||||
|
tweaked lyra2 AVX/AVX2 code for small improvement.
|
||||||
|
|
||||||
|
v3.4.1
|
||||||
|
|
||||||
|
big AVX2 optmizations for lyra2 +35%, lyra2v2 +11%, AVX also faster
|
||||||
|
fixed hmq1725
|
||||||
|
|
||||||
|
v3.4.0
|
||||||
|
|
||||||
|
fixed Windows compile error introduced in v3.3.9
|
||||||
|
fixed x11gost, broken in v3.3.7
|
||||||
|
AVX2 optimizations improving many algos:
|
||||||
|
- Lyra2RE +3%
|
||||||
|
- Lyra2REv2 +19%
|
||||||
|
- x11gost (sib) +6%
|
||||||
|
- x11evo +2.4%
|
||||||
|
- c11 +6.9%
|
||||||
|
- x11 +5%
|
||||||
|
- x13 +5%
|
||||||
|
- x14 +3.6%
|
||||||
|
- x15 +2.4%
|
||||||
|
- x17 +2.8%
|
||||||
|
- qubit +8.4%
|
||||||
|
|
||||||
|
|
||||||
|
Compile Instructions
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Building on linux prerequisites:
|
||||||
|
|
||||||
|
It is assumed users know how to install packages on their system and
|
||||||
|
be able to compile standard source packages. This is basic Linux and
|
||||||
|
beyond the scope of cpuminer-opt.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Make sure you have the basic development packages installed.
|
||||||
|
Here is a good start:
|
||||||
|
|
||||||
|
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
|
||||||
|
|
||||||
|
Install any additional dependencies needed by cpuminer-opt. The list below
|
||||||
|
are some of the ones that may not be in the default install and need to
|
||||||
|
be installed manually. There may be others, read the error messages they
|
||||||
|
will give a clue as to the missing package.
|
||||||
|
|
||||||
|
The folliwing command should install everything you need on Debian based
|
||||||
|
packages:
|
||||||
|
|
||||||
|
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake
|
||||||
|
|
||||||
|
Building on Linux, see below for Windows.
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
|
||||||
|
build-essential (for Ubuntu, Development Tools package group on Fedora)
|
||||||
|
automake
|
||||||
|
libjansson-dev
|
||||||
|
libgmp-dev
|
||||||
|
libcurl4-openssl-dev
|
||||||
|
libssl-dev
|
||||||
|
pthreads
|
||||||
|
zlib
|
||||||
|
|
||||||
|
tar xvzf [file.tar.gz]
|
||||||
|
cd [file]
|
||||||
|
|
||||||
|
Run build.sh to build on Linux or execute the following commands.
|
||||||
|
|
||||||
|
./autogen.sh
|
||||||
|
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||||
make
|
make
|
||||||
|
|
||||||
libcurl-devel and an development packages are required to be
|
Start mining.
|
||||||
installed to build this application and are available in most
|
|
||||||
Linux repositories.
|
|
||||||
|
|
||||||
To compile on older CPUs without AES_NI support use the following
|
./cpuminer -a algo ...
|
||||||
CFLAGS options: "-O3 -march=native -DNO_AES_NI"
|
|
||||||
|
|
||||||
Bugs
|
Building on Windows prerequisites:
|
||||||
----
|
|
||||||
|
|
||||||
Users are encouraged to post their bug reports on the Bitcoin Talk
|
msys
|
||||||
forum at:
|
mingw_w64
|
||||||
|
Visual C++ redistributable 2008 X64
|
||||||
|
openssl, not sure about this
|
||||||
|
|
||||||
https://bitcointalk.org/index.php?topic=1326803.0
|
Install msys and mingw_w64, only needed once.
|
||||||
|
|
||||||
Donations
|
Unpack msys into C:\msys or your preferred directory.
|
||||||
---------
|
|
||||||
|
|
||||||
I do not do this for money but I have a donation address if users
|
Install mingw__w64 from win-builds.
|
||||||
are so inclined.
|
Follow instructions, check "msys or cygwin" and "x86_64" and accept default
|
||||||
|
existing msys instalation.
|
||||||
|
|
||||||
bitcoin:12tdvfF7KmAsihBXQXynT6E6th2c2pByTT?label=donations
|
Open a msys shell by double clicking on msys.bat.
|
||||||
|
Note that msys shell uses linux syntax for file specifications, "C:\" is
|
||||||
|
mounted at "/c/".
|
||||||
|
|
||||||
Happy mining!
|
Add mingw bin directory to PATH variable
|
||||||
|
PATH="/c/msys/opt/windows_64/bin/:$PATH"
|
||||||
|
|
||||||
|
Instalation complete, compile cpuminer-opt
|
||||||
|
|
||||||
|
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
|
||||||
|
or similar Windows program.
|
||||||
|
|
||||||
|
In msys shell cd to miner directory.
|
||||||
|
cd /c/path/to/cpuminer-opt
|
||||||
|
|
||||||
|
Run winbuild.sh to build on Windows or execute the following commands.
|
||||||
|
|
||||||
|
./autogen.sh
|
||||||
|
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||||
|
make
|
||||||
|
|
||||||
|
The following tips may be useful for older AMD CPUs.
|
||||||
|
|
||||||
|
Some users with AMD CPUs without AES_NI have reported problems compiling
|
||||||
|
with build.sh or "-march=native". Problems have included compile errors
|
||||||
|
and poor performance. These users are recommended to compile manually
|
||||||
|
specifying "-march=btver1" on the configure command line.
|
||||||
|
|
||||||
|
Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||||
|
@@ -12,40 +12,36 @@ void blakecoin_close(void *cc, void *dst);
|
|||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
/* Move init out of loop, so init once externally,
|
// context management is staged for efficiency.
|
||||||
* and then use one single memcpy */
|
// 1. global initial ctx cached on startup
|
||||||
static sph_blake256_context blake_mid;
|
// 2. per-thread midstate ctx cache refreshed every scan
|
||||||
static bool ctx_midstate_done = false;
|
// 3. local ctx for final hash calculation
|
||||||
|
|
||||||
static void init_blake_hash(void)
|
static sph_blake256_context blake_init_ctx;
|
||||||
|
static __thread sph_blake256_context blake_mid_ctx;
|
||||||
|
|
||||||
|
static void blake_midstate_init( const void* input )
|
||||||
{
|
{
|
||||||
blakecoin_init(&blake_mid);
|
// copy cached initial state
|
||||||
ctx_midstate_done = true;
|
memcpy( &blake_mid_ctx, &blake_init_ctx, sizeof blake_mid_ctx );
|
||||||
|
blakecoin( &blake_mid_ctx, input, 64 );
|
||||||
}
|
}
|
||||||
|
|
||||||
void blakecoinhash(void *state, const void *input)
|
void blakecoinhash( void *state, const void *input )
|
||||||
{
|
{
|
||||||
sph_blake256_context ctx;
|
sph_blake256_context ctx;
|
||||||
|
|
||||||
uint8_t hash[64];
|
uint8_t hash[64];
|
||||||
uint8_t *ending = (uint8_t*) input;
|
uint8_t *ending = (uint8_t*) input + 64;
|
||||||
ending += 64;
|
|
||||||
|
|
||||||
// do one memcopy to get a fresh context
|
// copy cached midstate
|
||||||
if (!ctx_midstate_done) {
|
memcpy( &ctx, &blake_mid_ctx, sizeof ctx );
|
||||||
init_blake_hash();
|
blakecoin( &ctx, ending, 16 );
|
||||||
blakecoin(&blake_mid, input, 64);
|
blakecoin_close( &ctx, hash );
|
||||||
}
|
memcpy( state, hash, 32 );
|
||||||
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
|
|
||||||
|
|
||||||
blakecoin(&ctx, ending, 16);
|
|
||||||
blakecoin_close(&ctx, hash);
|
|
||||||
|
|
||||||
memcpy(state, hash, 32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done)
|
uint64_t *hashes_done )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
@@ -57,16 +53,14 @@ int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
uint32_t n = first_nonce;
|
uint32_t n = first_nonce;
|
||||||
|
|
||||||
ctx_midstate_done = false;
|
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
HTarget = 0x7f;
|
HTarget = 0x7f;
|
||||||
|
|
||||||
// we need big endian data...
|
// we need big endian data...
|
||||||
// be32enc_array( endiandata, pdata, 19 );
|
|
||||||
for (int kk=0; kk < 19; kk++)
|
for (int kk=0; kk < 19; kk++)
|
||||||
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
|
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
|
||||||
|
|
||||||
|
blake_midstate_init( endiandata );
|
||||||
|
|
||||||
#ifdef DEBUG_ALGO
|
#ifdef DEBUG_ALGO
|
||||||
applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
|
applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
|
||||||
@@ -117,6 +111,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
|
|||||||
gate->hash = (void*)&blakecoinhash;
|
gate->hash = (void*)&blakecoinhash;
|
||||||
gate->hash_alt = (void*)&blakecoinhash;
|
gate->hash_alt = (void*)&blakecoinhash;
|
||||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||||
|
blakecoin_init( &blake_init_ctx );
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -317,7 +317,6 @@ static const sph_u64 blkIV512[8] = {
|
|||||||
|
|
||||||
|
|
||||||
#define COMPRESS64 do { \
|
#define COMPRESS64 do { \
|
||||||
int r; \
|
|
||||||
int b=0; \
|
int b=0; \
|
||||||
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \
|
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||||
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \
|
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||||
|
@@ -1,133 +0,0 @@
|
|||||||
/*
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
|
|
||||||
|
|
||||||
LICENSE TERMS
|
|
||||||
|
|
||||||
The redistribution and use of this software (with or without changes)
|
|
||||||
is allowed without the payment of fees or royalties provided that:
|
|
||||||
|
|
||||||
1. source code distributions include the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer;
|
|
||||||
|
|
||||||
2. binary distributions include the above copyright notice, this list
|
|
||||||
of conditions and the following disclaimer in their documentation;
|
|
||||||
|
|
||||||
3. the name of the copyright holder is not used to endorse products
|
|
||||||
built using this software without specific written permission.
|
|
||||||
|
|
||||||
DISCLAIMER
|
|
||||||
|
|
||||||
This software is provided 'as is' with no explicit or implied warranties
|
|
||||||
in respect of its properties, including, but not limited to, correctness
|
|
||||||
and/or fitness for purpose.
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
Issue Date: 20/12/2007
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _BRG_ENDIAN_H
|
|
||||||
#define _BRG_ENDIAN_H
|
|
||||||
|
|
||||||
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
|
|
||||||
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
|
|
||||||
|
|
||||||
/* Include files where endian defines and byteswap functions may reside */
|
|
||||||
#if defined( __sun )
|
|
||||||
# include <sys/isa_defs.h>
|
|
||||||
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
|
|
||||||
# include <sys/endian.h>
|
|
||||||
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
|
|
||||||
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
|
|
||||||
# include <machine/endian.h>
|
|
||||||
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
|
|
||||||
# if !defined( __MINGW32__ ) && !defined( _AIX )
|
|
||||||
# include <endian.h>
|
|
||||||
# if !defined( __BEOS__ )
|
|
||||||
# include <byteswap.h>
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Now attempt to set the define for platform byte order using any */
|
|
||||||
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
|
|
||||||
/* seem to encompass most endian symbol definitions */
|
|
||||||
|
|
||||||
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
|
|
||||||
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif defined( BIG_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#elif defined( LITTLE_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
|
|
||||||
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif defined( _BIG_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#elif defined( _LITTLE_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
|
|
||||||
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif defined( __BIG_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#elif defined( __LITTLE_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
|
|
||||||
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif defined( __BIG_ENDIAN__ )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#elif defined( __LITTLE_ENDIAN__ )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* if the platform byte order could not be determined, then try to */
|
|
||||||
/* set this define using common machine defines */
|
|
||||||
#if !defined(PLATFORM_BYTE_ORDER)
|
|
||||||
|
|
||||||
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
|
|
||||||
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
|
|
||||||
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
|
|
||||||
defined( vax ) || defined( vms ) || defined( VMS ) || \
|
|
||||||
defined( __VMS ) || defined( _M_X64 )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
|
|
||||||
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
|
|
||||||
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
|
|
||||||
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
|
|
||||||
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
|
|
||||||
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
|
|
||||||
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
|
|
||||||
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
|
|
||||||
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#else
|
|
||||||
# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@@ -1,231 +0,0 @@
|
|||||||
/*
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
|
|
||||||
|
|
||||||
(a few lines added by Soeren S. Thomsen, October 2008)
|
|
||||||
|
|
||||||
LICENSE TERMS
|
|
||||||
|
|
||||||
The redistribution and use of this software (with or without changes)
|
|
||||||
is allowed without the payment of fees or royalties provided that:
|
|
||||||
|
|
||||||
1. source code distributions include the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer;
|
|
||||||
|
|
||||||
2. binary distributions include the above copyright notice, this list
|
|
||||||
of conditions and the following disclaimer in their documentation;
|
|
||||||
|
|
||||||
3. the name of the copyright holder is not used to endorse products
|
|
||||||
built using this software without specific written permission.
|
|
||||||
|
|
||||||
DISCLAIMER
|
|
||||||
|
|
||||||
This software is provided 'as is' with no explicit or implied warranties
|
|
||||||
in respect of its properties, including, but not limited to, correctness
|
|
||||||
and/or fitness for purpose.
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
Issue Date: 20/12/2007
|
|
||||||
|
|
||||||
The unsigned integer types defined here are of the form uint_<nn>t where
|
|
||||||
<nn> is the length of the type; for example, the unsigned 32-bit type is
|
|
||||||
'uint_32t'. These are NOT the same as the 'C99 integer types' that are
|
|
||||||
defined in the inttypes.h and stdint.h headers since attempts to use these
|
|
||||||
types have shown that support for them is still highly variable. However,
|
|
||||||
since the latter are of the form uint<nn>_t, a regular expression search
|
|
||||||
and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t')
|
|
||||||
can be used to convert the types used here to the C99 standard types.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _BRG_TYPES_H
|
|
||||||
#define _BRG_TYPES_H
|
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <limits.h>
|
|
||||||
|
|
||||||
#if defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
|
|
||||||
# include <stddef.h>
|
|
||||||
# define ptrint_t intptr_t
|
|
||||||
#elif defined( __GNUC__ ) && ( __GNUC__ >= 3 )
|
|
||||||
# include <stdint.h>
|
|
||||||
# define ptrint_t intptr_t
|
|
||||||
#else
|
|
||||||
# define ptrint_t int
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef BRG_UI8
|
|
||||||
# define BRG_UI8
|
|
||||||
# if UCHAR_MAX == 255u
|
|
||||||
typedef unsigned char uint_8t;
|
|
||||||
# else
|
|
||||||
# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef BRG_UI16
|
|
||||||
# define BRG_UI16
|
|
||||||
# if USHRT_MAX == 65535u
|
|
||||||
typedef unsigned short uint_16t;
|
|
||||||
# else
|
|
||||||
# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef BRG_UI32
|
|
||||||
# define BRG_UI32
|
|
||||||
# if UINT_MAX == 4294967295u
|
|
||||||
# define li_32(h) 0x##h##u
|
|
||||||
typedef unsigned int uint_32t;
|
|
||||||
# elif ULONG_MAX == 4294967295u
|
|
||||||
# define li_32(h) 0x##h##ul
|
|
||||||
typedef unsigned long uint_32t;
|
|
||||||
# elif defined( _CRAY )
|
|
||||||
# error This code needs 32-bit data types, which Cray machines do not provide
|
|
||||||
# else
|
|
||||||
# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef BRG_UI64
|
|
||||||
# if defined( __BORLANDC__ ) && !defined( __MSDOS__ )
|
|
||||||
# define BRG_UI64
|
|
||||||
# define li_64(h) 0x##h##ui64
|
|
||||||
typedef unsigned __int64 uint_64t;
|
|
||||||
# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */
|
|
||||||
# define BRG_UI64
|
|
||||||
# define li_64(h) 0x##h##ui64
|
|
||||||
typedef unsigned __int64 uint_64t;
|
|
||||||
# elif defined( __sun ) && defined( ULONG_MAX ) && ULONG_MAX == 0xfffffffful
|
|
||||||
# define BRG_UI64
|
|
||||||
# define li_64(h) 0x##h##ull
|
|
||||||
typedef unsigned long long uint_64t;
|
|
||||||
# elif defined( __MVS__ )
|
|
||||||
# define BRG_UI64
|
|
||||||
# define li_64(h) 0x##h##ull
|
|
||||||
typedef unsigned int long long uint_64t;
|
|
||||||
# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u
|
|
||||||
# if UINT_MAX == 18446744073709551615u
|
|
||||||
# define BRG_UI64
|
|
||||||
# define li_64(h) 0x##h##u
|
|
||||||
typedef unsigned int uint_64t;
|
|
||||||
# endif
|
|
||||||
# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u
|
|
||||||
# if ULONG_MAX == 18446744073709551615ul
|
|
||||||
# define BRG_UI64
|
|
||||||
# define li_64(h) 0x##h##ul
|
|
||||||
typedef unsigned long uint_64t;
|
|
||||||
# endif
|
|
||||||
# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u
|
|
||||||
# if ULLONG_MAX == 18446744073709551615ull
|
|
||||||
# define BRG_UI64
|
|
||||||
# define li_64(h) 0x##h##ull
|
|
||||||
typedef unsigned long long uint_64t;
|
|
||||||
# endif
|
|
||||||
# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u
|
|
||||||
# if ULONG_LONG_MAX == 18446744073709551615ull
|
|
||||||
# define BRG_UI64
|
|
||||||
# define li_64(h) 0x##h##ull
|
|
||||||
typedef unsigned long long uint_64t;
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined( BRG_UI64 )
|
|
||||||
# if defined( NEED_UINT_64T )
|
|
||||||
# error Please define uint_64t as an unsigned 64 bit type in brg_types.h
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef RETURN_VALUES
|
|
||||||
# define RETURN_VALUES
|
|
||||||
# if defined( DLL_EXPORT )
|
|
||||||
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
|
|
||||||
# define VOID_RETURN __declspec( dllexport ) void __stdcall
|
|
||||||
# define INT_RETURN __declspec( dllexport ) int __stdcall
|
|
||||||
# elif defined( __GNUC__ )
|
|
||||||
# define VOID_RETURN __declspec( __dllexport__ ) void
|
|
||||||
# define INT_RETURN __declspec( __dllexport__ ) int
|
|
||||||
# else
|
|
||||||
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
|
|
||||||
# endif
|
|
||||||
# elif defined( DLL_IMPORT )
|
|
||||||
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
|
|
||||||
# define VOID_RETURN __declspec( dllimport ) void __stdcall
|
|
||||||
# define INT_RETURN __declspec( dllimport ) int __stdcall
|
|
||||||
# elif defined( __GNUC__ )
|
|
||||||
# define VOID_RETURN __declspec( __dllimport__ ) void
|
|
||||||
# define INT_RETURN __declspec( __dllimport__ ) int
|
|
||||||
# else
|
|
||||||
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
|
|
||||||
# endif
|
|
||||||
# elif defined( __WATCOMC__ )
|
|
||||||
# define VOID_RETURN void __cdecl
|
|
||||||
# define INT_RETURN int __cdecl
|
|
||||||
# else
|
|
||||||
# define VOID_RETURN void
|
|
||||||
# define INT_RETURN int
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* These defines are used to detect and set the memory alignment of pointers.
|
|
||||||
Note that offsets are in bytes.
|
|
||||||
|
|
||||||
ALIGN_OFFSET(x,n) return the positive or zero offset of
|
|
||||||
the memory addressed by the pointer 'x'
|
|
||||||
from an address that is aligned on an
|
|
||||||
'n' byte boundary ('n' is a power of 2)
|
|
||||||
|
|
||||||
ALIGN_FLOOR(x,n) return a pointer that points to memory
|
|
||||||
that is aligned on an 'n' byte boundary
|
|
||||||
and is not higher than the memory address
|
|
||||||
pointed to by 'x' ('n' is a power of 2)
|
|
||||||
|
|
||||||
ALIGN_CEIL(x,n) return a pointer that points to memory
|
|
||||||
that is aligned on an 'n' byte boundary
|
|
||||||
and is not lower than the memory address
|
|
||||||
pointed to by 'x' ('n' is a power of 2)
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define ALIGN_OFFSET(x,n) (((ptrint_t)(x)) & ((n) - 1))
|
|
||||||
#define ALIGN_FLOOR(x,n) ((uint_8t*)(x) - ( ((ptrint_t)(x)) & ((n) - 1)))
|
|
||||||
#define ALIGN_CEIL(x,n) ((uint_8t*)(x) + (-((ptrint_t)(x)) & ((n) - 1)))
|
|
||||||
|
|
||||||
/* These defines are used to declare buffers in a way that allows
|
|
||||||
faster operations on longer variables to be used. In all these
|
|
||||||
defines 'size' must be a power of 2 and >= 8. NOTE that the
|
|
||||||
buffer size is in bytes but the type length is in bits
|
|
||||||
|
|
||||||
UNIT_TYPEDEF(x,size) declares a variable 'x' of length
|
|
||||||
'size' bits
|
|
||||||
|
|
||||||
BUFR_TYPEDEF(x,size,bsize) declares a buffer 'x' of length 'bsize'
|
|
||||||
bytes defined as an array of variables
|
|
||||||
each of 'size' bits (bsize must be a
|
|
||||||
multiple of size / 8)
|
|
||||||
|
|
||||||
UNIT_CAST(x,size) casts a variable to a type of
|
|
||||||
length 'size' bits
|
|
||||||
|
|
||||||
UPTR_CAST(x,size) casts a pointer to a pointer to a
|
|
||||||
varaiable of length 'size' bits
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define UI_TYPE(size) uint_##size##t
|
|
||||||
#define UNIT_TYPEDEF(x,size) typedef UI_TYPE(size) x
|
|
||||||
#define BUFR_TYPEDEF(x,size,bsize) typedef UI_TYPE(size) x[bsize / (size >> 3)]
|
|
||||||
#define UNIT_CAST(x,size) ((UI_TYPE(size) )(x))
|
|
||||||
#define UPTR_CAST(x,size) ((UI_TYPE(size)*)(x))
|
|
||||||
|
|
||||||
/* Added by Soeren S. Thomsen (begin) */
|
|
||||||
#define u8 uint_8t
|
|
||||||
#define u32 uint_32t
|
|
||||||
#define u64 uint_64t
|
|
||||||
/* (end) */
|
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
File diff suppressed because it is too large
Load Diff
@@ -1,956 +0,0 @@
|
|||||||
/* groestl-intr-vperm.h Aug 2011
|
|
||||||
*
|
|
||||||
* Groestl implementation with intrinsics using ssse3 instructions.
|
|
||||||
* Author: Günther A. Roland, Martin Schläffer
|
|
||||||
*
|
|
||||||
* Based on the vperm and aes_ni implementations of the hash function Groestl
|
|
||||||
* by Cagdas Calik <ccalik@metu.edu.tr> http://www.metu.edu.tr/~ccalik/
|
|
||||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <tmmintrin.h>
|
|
||||||
#include "grsi.h"
|
|
||||||
|
|
||||||
/*define data alignment for different C compilers*/
|
|
||||||
#if defined(__GNUC__)
|
|
||||||
#define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
|
|
||||||
#else
|
|
||||||
#define DATA_ALIGN16(x) __declspec(align(16)) x
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//#if defined(DECLARE_GLOBAL)
|
|
||||||
#if 1
|
|
||||||
#define GLOBAL
|
|
||||||
#else
|
|
||||||
#define GLOBAL extern
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//#if defined(DECLARE_IFUN)
|
|
||||||
#if 1
|
|
||||||
#define IFUN
|
|
||||||
#else
|
|
||||||
#define IFUN extern
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* global constants */
|
|
||||||
//GLOBAL __m128i grsiROUND_CONST_Lx;
|
|
||||||
//GLOBAL __m128i grsiROUND_CONST_L0[grsiROUNDS512];
|
|
||||||
//GLOBAL __m128i grsiROUND_CONST_L7[grsiROUNDS512];
|
|
||||||
DATA_ALIGN16(int32_t grsiSUBSH_MASK_short[8*4]) = {
|
|
||||||
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
|
|
||||||
0x04030201, 0x08070605, 0x0c0b0a09, 0x000f0e0d,
|
|
||||||
0x05040302, 0x09080706, 0x0d0c0b0a, 0x01000f0e,
|
|
||||||
0x06050403, 0x0a090807, 0x0e0d0c0b, 0x0201000f,
|
|
||||||
0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0x03020100,
|
|
||||||
0x08070605, 0x0c0b0a09, 0x000f0e0d, 0x04030201,
|
|
||||||
0x09080706, 0x0d0c0b0a, 0x01000f0e, 0x05040302,
|
|
||||||
0x0e0d0c0b, 0x0201000f, 0x06050403, 0x0a090807
|
|
||||||
};
|
|
||||||
GLOBAL __m128i *grsiSUBSH_MASK = grsiSUBSH_MASK_short;
|
|
||||||
GLOBAL __m128i grsiALL_0F = {0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f};
|
|
||||||
GLOBAL __m128i grsiALL_1B = {0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b};
|
|
||||||
GLOBAL __m128i grsiALL_FF = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff};
|
|
||||||
|
|
||||||
/* global unsknown */
|
|
||||||
|
|
||||||
|
|
||||||
GLOBAL __m128i grsiVPERM_OPT[2];
|
|
||||||
GLOBAL __m128i grsiVPERM_INV[2];
|
|
||||||
GLOBAL __m128i grsiVPERM_SB1[2];
|
|
||||||
GLOBAL __m128i grsiVPERM_SB2[2];
|
|
||||||
GLOBAL __m128i grsiVPERM_SB4[2];
|
|
||||||
GLOBAL __m128i grsiVPERM_SBO[2];
|
|
||||||
|
|
||||||
/* state vars */
|
|
||||||
GLOBAL __m128i grsiTRANSP_MASK;
|
|
||||||
GLOBAL __m128i grsiVPERM_IPT[2];
|
|
||||||
GLOBAL __m128i grsiALL_15;
|
|
||||||
GLOBAL __m128i grsiALL_63;
|
|
||||||
GLOBAL __m128i grsiROUND_CONST_P[grsiROUNDS1024];
|
|
||||||
GLOBAL __m128i grsiROUND_CONST_Q[grsiROUNDS1024];
|
|
||||||
|
|
||||||
#define grsitos(a) #a
|
|
||||||
#define grsitostr(a) grsitos(a)
|
|
||||||
|
|
||||||
/*
|
|
||||||
grsiALL_1B = _mm_set_epi32(0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b);\
|
|
||||||
grsiALL_63 = _mm_set_epi32(0x63636363, 0x63636363, 0x63636363, 0x63636363);\
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define grsiSET_SHARED_CONSTANTS(){\
|
|
||||||
grsiTRANSP_MASK = _mm_set_epi32(0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800);\
|
|
||||||
grsiALL_0F = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f);\
|
|
||||||
grsiALL_15 = _mm_set_epi32(0x15151515, 0x15151515, 0x15151515, 0x15151515);\
|
|
||||||
\
|
|
||||||
grsiVPERM_IPT[0] = _mm_set_epi32(0xCD80B1FC, 0xB0FDCC81, 0x4C01307D, 0x317C4D00);\
|
|
||||||
grsiVPERM_IPT[1] = _mm_set_epi32(0xCABAE090, 0x52227808, 0xC2B2E898, 0x5A2A7000);\
|
|
||||||
grsiVPERM_OPT[0] = _mm_set_epi32(0xE10D5DB1, 0xB05C0CE0, 0x01EDBD51, 0x50BCEC00);\
|
|
||||||
grsiVPERM_OPT[1] = _mm_set_epi32(0xF7974121, 0xDEBE6808, 0xFF9F4929, 0xD6B66000);\
|
|
||||||
grsiVPERM_INV[0] = _mm_set_epi32(0x030D0E0C, 0x02050809, 0x01040A06, 0x0F0B0780);\
|
|
||||||
grsiVPERM_INV[1] = _mm_set_epi32(0x04070309, 0x0A0B0C02, 0x0E05060F, 0x0D080180);\
|
|
||||||
grsiVPERM_SB1[0] = _mm_set_epi32(0x3BF7CCC1, 0x0D2ED9EF, 0x3618D415, 0xFAE22300);\
|
|
||||||
grsiVPERM_SB1[1] = _mm_set_epi32(0xA5DF7A6E, 0x142AF544, 0xB19BE18F, 0xCB503E00);\
|
|
||||||
grsiVPERM_SB2[0] = _mm_set_epi32(0xC2A163C8, 0xAB82234A, 0x69EB8840, 0x0AE12900);\
|
|
||||||
grsiVPERM_SB2[1] = _mm_set_epi32(0x5EB7E955, 0xBC982FCD, 0xE27A93C6, 0x0B712400);\
|
|
||||||
grsiVPERM_SB4[0] = _mm_set_epi32(0xBA44FE79, 0x876D2914, 0x3D50AED7, 0xC393EA00);\
|
|
||||||
grsiVPERM_SB4[1] = _mm_set_epi32(0xA876DE97, 0x49087E9F, 0xE1E937A0, 0x3FD64100);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* grsiVPERM
|
|
||||||
* Transform w/o settings c*
|
|
||||||
* transforms 2 rows to/from "vperm mode"
|
|
||||||
* this function is derived from:
|
|
||||||
* vperm and aes_ni implementations of hash function Grostl
|
|
||||||
* by Cagdas CALIK
|
|
||||||
* inputs:
|
|
||||||
* a0, a1 = 2 rows
|
|
||||||
* table = transformation table to use
|
|
||||||
* t*, c* = clobbers
|
|
||||||
* outputs:
|
|
||||||
* a0, a1 = 2 rows transformed with table
|
|
||||||
* */
|
|
||||||
#define grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2){\
|
|
||||||
t0 = c0;\
|
|
||||||
t1 = c0;\
|
|
||||||
t0 = _mm_andnot_si128(t0, a0);\
|
|
||||||
t1 = _mm_andnot_si128(t1, a1);\
|
|
||||||
t0 = _mm_srli_epi32(t0, 4);\
|
|
||||||
t1 = _mm_srli_epi32(t1, 4);\
|
|
||||||
a0 = _mm_and_si128(a0, c0);\
|
|
||||||
a1 = _mm_and_si128(a1, c0);\
|
|
||||||
t2 = c2;\
|
|
||||||
t3 = c2;\
|
|
||||||
t2 = _mm_shuffle_epi8(t2, a0);\
|
|
||||||
t3 = _mm_shuffle_epi8(t3, a1);\
|
|
||||||
a0 = c1;\
|
|
||||||
a1 = c1;\
|
|
||||||
a0 = _mm_shuffle_epi8(a0, t0);\
|
|
||||||
a1 = _mm_shuffle_epi8(a1, t1);\
|
|
||||||
a0 = _mm_xor_si128(a0, t2);\
|
|
||||||
a1 = _mm_xor_si128(a1, t3);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
#define grsiVPERM_Transform_Set_Const(table, c0, c1, c2){\
|
|
||||||
c0 = grsiALL_0F;\
|
|
||||||
c1 = ((__m128i*) table )[0];\
|
|
||||||
c2 = ((__m128i*) table )[1];\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* grsiVPERM
|
|
||||||
* Transform
|
|
||||||
* transforms 2 rows to/from "vperm mode"
|
|
||||||
* this function is derived from:
|
|
||||||
* vperm and aes_ni implementations of hash function Grostl
|
|
||||||
* by Cagdas CALIK
|
|
||||||
* inputs:
|
|
||||||
* a0, a1 = 2 rows
|
|
||||||
* table = transformation table to use
|
|
||||||
* t*, c* = clobbers
|
|
||||||
* outputs:
|
|
||||||
* a0, a1 = 2 rows transformed with table
|
|
||||||
* */
|
|
||||||
#define grsiVPERM_Transform(a0, a1, table, t0, t1, t2, t3, c0, c1, c2){\
|
|
||||||
grsiVPERM_Transform_Set_Const(table, c0, c1, c2);\
|
|
||||||
grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* grsiVPERM
|
|
||||||
* Transform State
|
|
||||||
* inputs:
|
|
||||||
* a0-a3 = state
|
|
||||||
* table = transformation table to use
|
|
||||||
* t* = clobbers
|
|
||||||
* outputs:
|
|
||||||
* a0-a3 = transformed state
|
|
||||||
* */
|
|
||||||
#define grsiVPERM_Transform_State(a0, a1, a2, a3, table, t0, t1, t2, t3, c0, c1, c2){\
|
|
||||||
grsiVPERM_Transform_Set_Const(table, c0, c1, c2);\
|
|
||||||
grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2);\
|
|
||||||
grsiVPERM_Transform_No_Const(a2, a3, t0, t1, t2, t3, c0, c1, c2);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* grsiVPERM
|
|
||||||
* Add Constant to State
|
|
||||||
* inputs:
|
|
||||||
* a0-a7 = state
|
|
||||||
* constant = constant to add
|
|
||||||
* t0 = clobber
|
|
||||||
* outputs:
|
|
||||||
* a0-a7 = state + constant
|
|
||||||
* */
|
|
||||||
#define grsiVPERM_Add_Constant(a0, a1, a2, a3, a4, a5, a6, a7, constant, t0){\
|
|
||||||
t0 = constant;\
|
|
||||||
a0 = _mm_xor_si128(a0, t0);\
|
|
||||||
a1 = _mm_xor_si128(a1, t0);\
|
|
||||||
a2 = _mm_xor_si128(a2, t0);\
|
|
||||||
a3 = _mm_xor_si128(a3, t0);\
|
|
||||||
a4 = _mm_xor_si128(a4, t0);\
|
|
||||||
a5 = _mm_xor_si128(a5, t0);\
|
|
||||||
a6 = _mm_xor_si128(a6, t0);\
|
|
||||||
a7 = _mm_xor_si128(a7, t0);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* grsiVPERM
|
|
||||||
* Set Substitute Core Constants
|
|
||||||
* */
|
|
||||||
#define grsiVPERM_Substitute_Core_Set_Const(c0, c1, c2){\
|
|
||||||
grsiVPERM_Transform_Set_Const(grsiVPERM_INV, c0, c1, c2);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* grsiVPERM
|
|
||||||
* Substitute Core
|
|
||||||
* first part of sbox inverse computation
|
|
||||||
* this function is derived from:
|
|
||||||
* vperm and aes_ni implementations of hash function Grostl
|
|
||||||
* by Cagdas CALIK
|
|
||||||
* inputs:
|
|
||||||
* a0 = 1 row
|
|
||||||
* t*, c* = clobbers
|
|
||||||
* outputs:
|
|
||||||
* b0a, b0b = inputs for lookup step
|
|
||||||
* */
|
|
||||||
#define grsiVPERM_Substitute_Core(a0, b0a, b0b, t0, t1, c0, c1, c2){\
|
|
||||||
t0 = c0;\
|
|
||||||
t0 = _mm_andnot_si128(t0, a0);\
|
|
||||||
t0 = _mm_srli_epi32(t0, 4);\
|
|
||||||
a0 = _mm_and_si128(a0, c0);\
|
|
||||||
b0a = c1;\
|
|
||||||
b0a = _mm_shuffle_epi8(b0a, a0);\
|
|
||||||
a0 = _mm_xor_si128(a0, t0);\
|
|
||||||
b0b = c2;\
|
|
||||||
b0b = _mm_shuffle_epi8(b0b, t0);\
|
|
||||||
b0b = _mm_xor_si128(b0b, b0a);\
|
|
||||||
t1 = c2;\
|
|
||||||
t1 = _mm_shuffle_epi8(t1, a0);\
|
|
||||||
t1 = _mm_xor_si128(t1, b0a);\
|
|
||||||
b0a = c2;\
|
|
||||||
b0a = _mm_shuffle_epi8(b0a, b0b);\
|
|
||||||
b0a = _mm_xor_si128(b0a, a0);\
|
|
||||||
b0b = c2;\
|
|
||||||
b0b = _mm_shuffle_epi8(b0b, t1);\
|
|
||||||
b0b = _mm_xor_si128(b0b, t0);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* grsiVPERM
|
|
||||||
* Lookup
|
|
||||||
* second part of sbox inverse computation
|
|
||||||
* this function is derived from:
|
|
||||||
* vperm and aes_ni implementations of hash function Grostl
|
|
||||||
* by Cagdas CALIK
|
|
||||||
* inputs:
|
|
||||||
* a0a, a0b = output of Substitution Core
|
|
||||||
* table = lookup table to use (*1 / *2 / *4)
|
|
||||||
* t0 = clobber
|
|
||||||
* outputs:
|
|
||||||
* b0 = output of sbox + multiplication
|
|
||||||
* */
|
|
||||||
#define grsiVPERM_Lookup(a0a, a0b, table, b0, t0){\
|
|
||||||
b0 = ((__m128i*) table )[0];\
|
|
||||||
t0 = ((__m128i*) table )[1];\
|
|
||||||
b0 = _mm_shuffle_epi8(b0, a0b);\
|
|
||||||
t0 = _mm_shuffle_epi8(t0, a0a);\
|
|
||||||
b0 = _mm_xor_si128(b0, t0);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* grsiVPERM
|
|
||||||
* SubBytes and *2 / *4
|
|
||||||
* this function is derived from:
|
|
||||||
* Constant-time SSSE3 AES core implementation
|
|
||||||
* by Mike Hamburg
|
|
||||||
* and
|
|
||||||
* vperm and aes_ni implementations of hash function Grostl
|
|
||||||
* by Cagdas CALIK
|
|
||||||
* inputs:
|
|
||||||
* a0-a7 = state
|
|
||||||
* t*, c* = clobbers
|
|
||||||
* outputs:
|
|
||||||
* a0-a7 = state * 4
|
|
||||||
* c2 = row0 * 2 -> b0
|
|
||||||
* c1 = row7 * 2 -> b3
|
|
||||||
* c0 = row7 * 1 -> b4
|
|
||||||
* t2 = row4 * 1 -> b7
|
|
||||||
* TEMP_MUL1 = row(i) * 1
|
|
||||||
* TEMP_MUL2 = row(i) * 2
|
|
||||||
*
|
|
||||||
* call:grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, b1, b2, b5, b6, b0, b3, b4, b7) */
|
|
||||||
#define grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t3, t4, c2, c1, c0, t2){\
|
|
||||||
/* set Constants */\
|
|
||||||
grsiVPERM_Substitute_Core_Set_Const(c0, c1, c2);\
|
|
||||||
/* row 1 */\
|
|
||||||
grsiVPERM_Substitute_Core(a1, t0, t1, t3, t4, c0, c1, c2);\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
|
||||||
TEMP_MUL1[1] = t2;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
|
||||||
TEMP_MUL2[1] = t3;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a1, t4);\
|
|
||||||
/* --- */\
|
|
||||||
/* row 2 */\
|
|
||||||
grsiVPERM_Substitute_Core(a2, t0, t1, t3, t4, c0, c1, c2);\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
|
||||||
TEMP_MUL1[2] = t2;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
|
||||||
TEMP_MUL2[2] = t3;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a2, t4);\
|
|
||||||
/* --- */\
|
|
||||||
/* row 3 */\
|
|
||||||
grsiVPERM_Substitute_Core(a3, t0, t1, t3, t4, c0, c1, c2);\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
|
||||||
TEMP_MUL1[3] = t2;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
|
||||||
TEMP_MUL2[3] = t3;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a3, t4);\
|
|
||||||
/* --- */\
|
|
||||||
/* row 5 */\
|
|
||||||
grsiVPERM_Substitute_Core(a5, t0, t1, t3, t4, c0, c1, c2);\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
|
||||||
TEMP_MUL1[5] = t2;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
|
||||||
TEMP_MUL2[5] = t3;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a5, t4);\
|
|
||||||
/* --- */\
|
|
||||||
/* row 6 */\
|
|
||||||
grsiVPERM_Substitute_Core(a6, t0, t1, t3, t4, c0, c1, c2);\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
|
||||||
TEMP_MUL1[6] = t2;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
|
||||||
TEMP_MUL2[6] = t3;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a6, t4);\
|
|
||||||
/* --- */\
|
|
||||||
/* row 7 */\
|
|
||||||
grsiVPERM_Substitute_Core(a7, t0, t1, t3, t4, c0, c1, c2);\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
|
||||||
TEMP_MUL1[7] = t2;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, c1, t4); /*c1 -> b3*/\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a7, t4);\
|
|
||||||
/* --- */\
|
|
||||||
/* row 4 */\
|
|
||||||
grsiVPERM_Substitute_Core(a4, t0, t1, t3, t4, c0, (grsiVPERM_INV[0]), c2);\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4); /*t2 -> b7*/\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
|
||||||
TEMP_MUL2[4] = t3;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a4, t4);\
|
|
||||||
/* --- */\
|
|
||||||
/* row 0 */\
|
|
||||||
grsiVPERM_Substitute_Core(a0, t0, t1, t3, t4, c0, (grsiVPERM_INV[0]), c2);\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, c0, t4); /*c0 -> b4*/\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, c2, t4); /*c2 -> b0*/\
|
|
||||||
TEMP_MUL2[0] = c2;\
|
|
||||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a0, t4);\
|
|
||||||
/* --- */\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
|
|
||||||
/* Optimized grsiMixBytes
|
|
||||||
* inputs:
|
|
||||||
* a0-a7 = (row0-row7) * 4
|
|
||||||
* b0 = row0 * 2
|
|
||||||
* b3 = row7 * 2
|
|
||||||
* b4 = row7 * 1
|
|
||||||
* b7 = row4 * 1
|
|
||||||
* all *1 and *2 values must also be in TEMP_MUL1, TEMP_MUL2
|
|
||||||
* output: b0-b7
|
|
||||||
* */
|
|
||||||
#define grsiMixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
|
||||||
/* save one value */\
|
|
||||||
TEMP_MUL4 = a3;\
|
|
||||||
/* 1 */\
|
|
||||||
b1 = a0;\
|
|
||||||
b1 = _mm_xor_si128(b1, a5);\
|
|
||||||
b1 = _mm_xor_si128(b1, b4); /* -> helper! */\
|
|
||||||
b1 = _mm_xor_si128(b1, (TEMP_MUL2[3]));\
|
|
||||||
b2 = b1;\
|
|
||||||
\
|
|
||||||
/* 2 */\
|
|
||||||
b5 = a1;\
|
|
||||||
b5 = _mm_xor_si128(b5, a4);\
|
|
||||||
b5 = _mm_xor_si128(b5, b7); /* -> helper! */\
|
|
||||||
b5 = _mm_xor_si128(b5, b3); /* -> helper! */\
|
|
||||||
b6 = b5;\
|
|
||||||
\
|
|
||||||
/* 4 */\
|
|
||||||
b7 = _mm_xor_si128(b7, a6);\
|
|
||||||
/*b7 = _mm_xor_si128(b7, (TEMP_MUL1[4])); -> helper! */\
|
|
||||||
b7 = _mm_xor_si128(b7, (TEMP_MUL1[6]));\
|
|
||||||
b7 = _mm_xor_si128(b7, (TEMP_MUL2[1]));\
|
|
||||||
b7 = _mm_xor_si128(b7, b3); /* -> helper! */\
|
|
||||||
b2 = _mm_xor_si128(b2, b7);\
|
|
||||||
\
|
|
||||||
/* 3 */\
|
|
||||||
b0 = _mm_xor_si128(b0, a7);\
|
|
||||||
b0 = _mm_xor_si128(b0, (TEMP_MUL1[5]));\
|
|
||||||
b0 = _mm_xor_si128(b0, (TEMP_MUL1[7]));\
|
|
||||||
/*b0 = _mm_xor_si128(b0, (TEMP_MUL2[0])); -> helper! */\
|
|
||||||
b0 = _mm_xor_si128(b0, (TEMP_MUL2[2]));\
|
|
||||||
b3 = b0;\
|
|
||||||
b1 = _mm_xor_si128(b1, b0);\
|
|
||||||
b0 = _mm_xor_si128(b0, b7); /* moved from 4 */\
|
|
||||||
\
|
|
||||||
/* 5 */\
|
|
||||||
b4 = _mm_xor_si128(b4, a2);\
|
|
||||||
/*b4 = _mm_xor_si128(b4, (TEMP_MUL1[0])); -> helper! */\
|
|
||||||
b4 = _mm_xor_si128(b4, (TEMP_MUL1[2]));\
|
|
||||||
b4 = _mm_xor_si128(b4, (TEMP_MUL2[3]));\
|
|
||||||
b4 = _mm_xor_si128(b4, (TEMP_MUL2[5]));\
|
|
||||||
b3 = _mm_xor_si128(b3, b4);\
|
|
||||||
b6 = _mm_xor_si128(b6, b4);\
|
|
||||||
\
|
|
||||||
/* 6 */\
|
|
||||||
a3 = _mm_xor_si128(a3, (TEMP_MUL1[1]));\
|
|
||||||
a3 = _mm_xor_si128(a3, (TEMP_MUL1[3]));\
|
|
||||||
a3 = _mm_xor_si128(a3, (TEMP_MUL2[4]));\
|
|
||||||
a3 = _mm_xor_si128(a3, (TEMP_MUL2[6]));\
|
|
||||||
b4 = _mm_xor_si128(b4, a3);\
|
|
||||||
b5 = _mm_xor_si128(b5, a3);\
|
|
||||||
b7 = _mm_xor_si128(b7, a3);\
|
|
||||||
\
|
|
||||||
/* 7 */\
|
|
||||||
a1 = _mm_xor_si128(a1, (TEMP_MUL1[1]));\
|
|
||||||
a1 = _mm_xor_si128(a1, (TEMP_MUL2[4]));\
|
|
||||||
b2 = _mm_xor_si128(b2, a1);\
|
|
||||||
b3 = _mm_xor_si128(b3, a1);\
|
|
||||||
\
|
|
||||||
/* 8 */\
|
|
||||||
a5 = _mm_xor_si128(a5, (TEMP_MUL1[5]));\
|
|
||||||
a5 = _mm_xor_si128(a5, (TEMP_MUL2[0]));\
|
|
||||||
b6 = _mm_xor_si128(b6, a5);\
|
|
||||||
b7 = _mm_xor_si128(b7, a5);\
|
|
||||||
\
|
|
||||||
/* 9 */\
|
|
||||||
a3 = TEMP_MUL1[2];\
|
|
||||||
a3 = _mm_xor_si128(a3, (TEMP_MUL2[5]));\
|
|
||||||
b0 = _mm_xor_si128(b0, a3);\
|
|
||||||
b5 = _mm_xor_si128(b5, a3);\
|
|
||||||
\
|
|
||||||
/* 10 */\
|
|
||||||
a1 = TEMP_MUL1[6];\
|
|
||||||
a1 = _mm_xor_si128(a1, (TEMP_MUL2[1]));\
|
|
||||||
b1 = _mm_xor_si128(b1, a1);\
|
|
||||||
b4 = _mm_xor_si128(b4, a1);\
|
|
||||||
\
|
|
||||||
/* 11 */\
|
|
||||||
a5 = TEMP_MUL1[3];\
|
|
||||||
a5 = _mm_xor_si128(a5, (TEMP_MUL2[6]));\
|
|
||||||
b1 = _mm_xor_si128(b1, a5);\
|
|
||||||
b6 = _mm_xor_si128(b6, a5);\
|
|
||||||
\
|
|
||||||
/* 12 */\
|
|
||||||
a3 = TEMP_MUL1[7];\
|
|
||||||
a3 = _mm_xor_si128(a3, (TEMP_MUL2[2]));\
|
|
||||||
b2 = _mm_xor_si128(b2, a3);\
|
|
||||||
b5 = _mm_xor_si128(b5, a3);\
|
|
||||||
\
|
|
||||||
/* 13 */\
|
|
||||||
b0 = _mm_xor_si128(b0, (TEMP_MUL4));\
|
|
||||||
b0 = _mm_xor_si128(b0, a4);\
|
|
||||||
b1 = _mm_xor_si128(b1, a4);\
|
|
||||||
b3 = _mm_xor_si128(b3, a6);\
|
|
||||||
b4 = _mm_xor_si128(b4, a0);\
|
|
||||||
b4 = _mm_xor_si128(b4, a7);\
|
|
||||||
b5 = _mm_xor_si128(b5, a0);\
|
|
||||||
b7 = _mm_xor_si128(b7, a2);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/*
|
|
||||||
grsiSUBSH_MASK[0] = _mm_set_epi32(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);\
|
|
||||||
grsiSUBSH_MASK[1] = _mm_set_epi32(0x000f0e0d, 0x0c0b0a09, 0x08070605, 0x04030201);\
|
|
||||||
grsiSUBSH_MASK[2] = _mm_set_epi32(0x01000f0e, 0x0d0c0b0a, 0x09080706, 0x05040302);\
|
|
||||||
grsiSUBSH_MASK[3] = _mm_set_epi32(0x0201000f, 0x0e0d0c0b, 0x0a090807, 0x06050403);\
|
|
||||||
grsiSUBSH_MASK[4] = _mm_set_epi32(0x03020100, 0x0f0e0d0c, 0x0b0a0908, 0x07060504);\
|
|
||||||
grsiSUBSH_MASK[5] = _mm_set_epi32(0x04030201, 0x000f0e0d, 0x0c0b0a09, 0x08070605);\
|
|
||||||
grsiSUBSH_MASK[6] = _mm_set_epi32(0x05040302, 0x01000f0e, 0x0d0c0b0a, 0x09080706);\
|
|
||||||
grsiSUBSH_MASK[7] = _mm_set_epi32(0x0a090807, 0x06050403, 0x0201000f, 0x0e0d0c0b);\
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define grsiSET_CONSTANTS(){\
|
|
||||||
grsiSET_SHARED_CONSTANTS();\
|
|
||||||
grsiALL_FF = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);\
|
|
||||||
for(i = 0; i < grsiROUNDS1024; i++)\
|
|
||||||
{\
|
|
||||||
grsiROUND_CONST_P[i] = _mm_set_epi32(0xf0e0d0c0 ^ (i * 0x01010101), 0xb0a09080 ^ (i * 0x01010101), 0x70605040 ^ (i * 0x01010101), 0x30201000 ^ (i * 0x01010101));\
|
|
||||||
grsiROUND_CONST_Q[i] = _mm_set_epi32(0x0f1f2f3f ^ (i * 0x01010101), 0x4f5f6f7f ^ (i * 0x01010101), 0x8f9fafbf ^ (i * 0x01010101), 0xcfdfefff ^ (i * 0x01010101));\
|
|
||||||
}\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* one round
|
|
||||||
* a0-a7 = input rows
|
|
||||||
* b0-b7 = output rows
|
|
||||||
*/
|
|
||||||
#define grsiSUBMIX(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
|
||||||
/* SubBytes + Multiplication */\
|
|
||||||
grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, b1, b2, b5, b6, b0, b3, b4, b7);\
|
|
||||||
/* grsiMixBytes */\
|
|
||||||
grsiMixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
#define grsiROUNDS_P(){\
|
|
||||||
u32 round_counter;\
|
|
||||||
for(round_counter = 0; round_counter < 14; round_counter+=2) {\
|
|
||||||
/* AddRoundConstant P1024 */\
|
|
||||||
xmm8 = _mm_xor_si128(xmm8, (grsiROUND_CONST_P[round_counter]));\
|
|
||||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
|
||||||
xmm8 = _mm_shuffle_epi8(xmm8, (grsiSUBSH_MASK[0]));\
|
|
||||||
xmm9 = _mm_shuffle_epi8(xmm9, (grsiSUBSH_MASK[1]));\
|
|
||||||
xmm10 = _mm_shuffle_epi8(xmm10, (grsiSUBSH_MASK[2]));\
|
|
||||||
xmm11 = _mm_shuffle_epi8(xmm11, (grsiSUBSH_MASK[3]));\
|
|
||||||
xmm12 = _mm_shuffle_epi8(xmm12, (grsiSUBSH_MASK[4]));\
|
|
||||||
xmm13 = _mm_shuffle_epi8(xmm13, (grsiSUBSH_MASK[5]));\
|
|
||||||
xmm14 = _mm_shuffle_epi8(xmm14, (grsiSUBSH_MASK[6]));\
|
|
||||||
xmm15 = _mm_shuffle_epi8(xmm15, (grsiSUBSH_MASK[7]));\
|
|
||||||
/* SubBytes + grsiMixBytes */\
|
|
||||||
grsiSUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
|
||||||
grsiVPERM_Add_Constant(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, grsiALL_15, xmm8);\
|
|
||||||
\
|
|
||||||
/* AddRoundConstant P1024 */\
|
|
||||||
xmm0 = _mm_xor_si128(xmm0, (grsiROUND_CONST_P[round_counter+1]));\
|
|
||||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
|
||||||
xmm0 = _mm_shuffle_epi8(xmm0, (grsiSUBSH_MASK[0]));\
|
|
||||||
xmm1 = _mm_shuffle_epi8(xmm1, (grsiSUBSH_MASK[1]));\
|
|
||||||
xmm2 = _mm_shuffle_epi8(xmm2, (grsiSUBSH_MASK[2]));\
|
|
||||||
xmm3 = _mm_shuffle_epi8(xmm3, (grsiSUBSH_MASK[3]));\
|
|
||||||
xmm4 = _mm_shuffle_epi8(xmm4, (grsiSUBSH_MASK[4]));\
|
|
||||||
xmm5 = _mm_shuffle_epi8(xmm5, (grsiSUBSH_MASK[5]));\
|
|
||||||
xmm6 = _mm_shuffle_epi8(xmm6, (grsiSUBSH_MASK[6]));\
|
|
||||||
xmm7 = _mm_shuffle_epi8(xmm7, (grsiSUBSH_MASK[7]));\
|
|
||||||
/* SubBytes + grsiMixBytes */\
|
|
||||||
grsiSUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
|
||||||
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm0);\
|
|
||||||
}\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
#define grsiROUNDS_Q(){\
|
|
||||||
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm1);\
|
|
||||||
u32 round_counter = 0;\
|
|
||||||
for(round_counter = 0; round_counter < 14; round_counter+=2) {\
|
|
||||||
/* AddRoundConstant Q1024 */\
|
|
||||||
xmm1 = grsiALL_FF;\
|
|
||||||
xmm8 = _mm_xor_si128(xmm8, xmm1);\
|
|
||||||
xmm9 = _mm_xor_si128(xmm9, xmm1);\
|
|
||||||
xmm10 = _mm_xor_si128(xmm10, xmm1);\
|
|
||||||
xmm11 = _mm_xor_si128(xmm11, xmm1);\
|
|
||||||
xmm12 = _mm_xor_si128(xmm12, xmm1);\
|
|
||||||
xmm13 = _mm_xor_si128(xmm13, xmm1);\
|
|
||||||
xmm14 = _mm_xor_si128(xmm14, xmm1);\
|
|
||||||
xmm15 = _mm_xor_si128(xmm15, (grsiROUND_CONST_Q[round_counter]));\
|
|
||||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
|
||||||
xmm8 = _mm_shuffle_epi8(xmm8, (grsiSUBSH_MASK[1]));\
|
|
||||||
xmm9 = _mm_shuffle_epi8(xmm9, (grsiSUBSH_MASK[3]));\
|
|
||||||
xmm10 = _mm_shuffle_epi8(xmm10, (grsiSUBSH_MASK[5]));\
|
|
||||||
xmm11 = _mm_shuffle_epi8(xmm11, (grsiSUBSH_MASK[7]));\
|
|
||||||
xmm12 = _mm_shuffle_epi8(xmm12, (grsiSUBSH_MASK[0]));\
|
|
||||||
xmm13 = _mm_shuffle_epi8(xmm13, (grsiSUBSH_MASK[2]));\
|
|
||||||
xmm14 = _mm_shuffle_epi8(xmm14, (grsiSUBSH_MASK[4]));\
|
|
||||||
xmm15 = _mm_shuffle_epi8(xmm15, (grsiSUBSH_MASK[6]));\
|
|
||||||
/* SubBytes + grsiMixBytes */\
|
|
||||||
grsiSUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
|
||||||
\
|
|
||||||
/* AddRoundConstant Q1024 */\
|
|
||||||
xmm9 = grsiALL_FF;\
|
|
||||||
xmm0 = _mm_xor_si128(xmm0, xmm9);\
|
|
||||||
xmm1 = _mm_xor_si128(xmm1, xmm9);\
|
|
||||||
xmm2 = _mm_xor_si128(xmm2, xmm9);\
|
|
||||||
xmm3 = _mm_xor_si128(xmm3, xmm9);\
|
|
||||||
xmm4 = _mm_xor_si128(xmm4, xmm9);\
|
|
||||||
xmm5 = _mm_xor_si128(xmm5, xmm9);\
|
|
||||||
xmm6 = _mm_xor_si128(xmm6, xmm9);\
|
|
||||||
xmm7 = _mm_xor_si128(xmm7, (grsiROUND_CONST_Q[round_counter+1]));\
|
|
||||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
|
||||||
xmm0 = _mm_shuffle_epi8(xmm0, (grsiSUBSH_MASK[1]));\
|
|
||||||
xmm1 = _mm_shuffle_epi8(xmm1, (grsiSUBSH_MASK[3]));\
|
|
||||||
xmm2 = _mm_shuffle_epi8(xmm2, (grsiSUBSH_MASK[5]));\
|
|
||||||
xmm3 = _mm_shuffle_epi8(xmm3, (grsiSUBSH_MASK[7]));\
|
|
||||||
xmm4 = _mm_shuffle_epi8(xmm4, (grsiSUBSH_MASK[0]));\
|
|
||||||
xmm5 = _mm_shuffle_epi8(xmm5, (grsiSUBSH_MASK[2]));\
|
|
||||||
xmm6 = _mm_shuffle_epi8(xmm6, (grsiSUBSH_MASK[4]));\
|
|
||||||
xmm7 = _mm_shuffle_epi8(xmm7, (grsiSUBSH_MASK[6]));\
|
|
||||||
/* SubBytes + grsiMixBytes*/ \
|
|
||||||
grsiSUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
|
||||||
}\
|
|
||||||
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm1);\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
|
|
||||||
/* Matrix Transpose
|
|
||||||
* input is a 1024-bit state with two columns in one xmm
|
|
||||||
* output is a 1024-bit state with two rows in one xmm
|
|
||||||
* inputs: i0-i7
|
|
||||||
* outputs: i0-i7
|
|
||||||
* clobbers: t0-t7
|
|
||||||
*/
|
|
||||||
#define grsiMatrix_Transpose(i0, i1, i2, i3, i4, i5, i6, i7, t0, t1, t2, t3, t4, t5, t6, t7){\
|
|
||||||
t0 = grsiTRANSP_MASK;\
|
|
||||||
\
|
|
||||||
i6 = _mm_shuffle_epi8(i6, t0);\
|
|
||||||
i0 = _mm_shuffle_epi8(i0, t0);\
|
|
||||||
i1 = _mm_shuffle_epi8(i1, t0);\
|
|
||||||
i2 = _mm_shuffle_epi8(i2, t0);\
|
|
||||||
i3 = _mm_shuffle_epi8(i3, t0);\
|
|
||||||
t1 = i2;\
|
|
||||||
i4 = _mm_shuffle_epi8(i4, t0);\
|
|
||||||
i5 = _mm_shuffle_epi8(i5, t0);\
|
|
||||||
t2 = i4;\
|
|
||||||
t3 = i6;\
|
|
||||||
i7 = _mm_shuffle_epi8(i7, t0);\
|
|
||||||
\
|
|
||||||
/* continue with unpack using 4 temp registers */\
|
|
||||||
t0 = i0;\
|
|
||||||
t2 = _mm_unpackhi_epi16(t2, i5);\
|
|
||||||
i4 = _mm_unpacklo_epi16(i4, i5);\
|
|
||||||
t3 = _mm_unpackhi_epi16(t3, i7);\
|
|
||||||
i6 = _mm_unpacklo_epi16(i6, i7);\
|
|
||||||
t0 = _mm_unpackhi_epi16(t0, i1);\
|
|
||||||
t1 = _mm_unpackhi_epi16(t1, i3);\
|
|
||||||
i2 = _mm_unpacklo_epi16(i2, i3);\
|
|
||||||
i0 = _mm_unpacklo_epi16(i0, i1);\
|
|
||||||
\
|
|
||||||
/* shuffle with immediate */\
|
|
||||||
t0 = _mm_shuffle_epi32(t0, 216);\
|
|
||||||
t1 = _mm_shuffle_epi32(t1, 216);\
|
|
||||||
t2 = _mm_shuffle_epi32(t2, 216);\
|
|
||||||
t3 = _mm_shuffle_epi32(t3, 216);\
|
|
||||||
i0 = _mm_shuffle_epi32(i0, 216);\
|
|
||||||
i2 = _mm_shuffle_epi32(i2, 216);\
|
|
||||||
i4 = _mm_shuffle_epi32(i4, 216);\
|
|
||||||
i6 = _mm_shuffle_epi32(i6, 216);\
|
|
||||||
\
|
|
||||||
/* continue with unpack */\
|
|
||||||
t4 = i0;\
|
|
||||||
i0 = _mm_unpacklo_epi32(i0, i2);\
|
|
||||||
t4 = _mm_unpackhi_epi32(t4, i2);\
|
|
||||||
t5 = t0;\
|
|
||||||
t0 = _mm_unpacklo_epi32(t0, t1);\
|
|
||||||
t5 = _mm_unpackhi_epi32(t5, t1);\
|
|
||||||
t6 = i4;\
|
|
||||||
i4 = _mm_unpacklo_epi32(i4, i6);\
|
|
||||||
t7 = t2;\
|
|
||||||
t6 = _mm_unpackhi_epi32(t6, i6);\
|
|
||||||
i2 = t0;\
|
|
||||||
t2 = _mm_unpacklo_epi32(t2, t3);\
|
|
||||||
i3 = t0;\
|
|
||||||
t7 = _mm_unpackhi_epi32(t7, t3);\
|
|
||||||
\
|
|
||||||
/* there are now 2 rows in each xmm */\
|
|
||||||
/* unpack to get 1 row of CV in each xmm */\
|
|
||||||
i1 = i0;\
|
|
||||||
i1 = _mm_unpackhi_epi64(i1, i4);\
|
|
||||||
i0 = _mm_unpacklo_epi64(i0, i4);\
|
|
||||||
i4 = t4;\
|
|
||||||
i3 = _mm_unpackhi_epi64(i3, t2);\
|
|
||||||
i5 = t4;\
|
|
||||||
i2 = _mm_unpacklo_epi64(i2, t2);\
|
|
||||||
i6 = t5;\
|
|
||||||
i5 = _mm_unpackhi_epi64(i5, t6);\
|
|
||||||
i7 = t5;\
|
|
||||||
i4 = _mm_unpacklo_epi64(i4, t6);\
|
|
||||||
i7 = _mm_unpackhi_epi64(i7, t7);\
|
|
||||||
i6 = _mm_unpacklo_epi64(i6, t7);\
|
|
||||||
/* transpose done */\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* Matrix Transpose Inverse
|
|
||||||
* input is a 1024-bit state with two rows in one xmm
|
|
||||||
* output is a 1024-bit state with two columns in one xmm
|
|
||||||
* inputs: i0-i7
|
|
||||||
* outputs: (i0, o0, i1, i3, o1, o2, i5, i7)
|
|
||||||
* clobbers: t0-t4
|
|
||||||
*/
|
|
||||||
#define grsiMatrix_Transpose_INV(i0, i1, i2, i3, i4, i5, i6, i7, o0, o1, o2, t0, t1, t2, t3, t4){\
|
|
||||||
/* transpose matrix to get output format */\
|
|
||||||
o1 = i0;\
|
|
||||||
i0 = _mm_unpacklo_epi64(i0, i1);\
|
|
||||||
o1 = _mm_unpackhi_epi64(o1, i1);\
|
|
||||||
t0 = i2;\
|
|
||||||
i2 = _mm_unpacklo_epi64(i2, i3);\
|
|
||||||
t0 = _mm_unpackhi_epi64(t0, i3);\
|
|
||||||
t1 = i4;\
|
|
||||||
i4 = _mm_unpacklo_epi64(i4, i5);\
|
|
||||||
t1 = _mm_unpackhi_epi64(t1, i5);\
|
|
||||||
t2 = i6;\
|
|
||||||
o0 = grsiTRANSP_MASK;\
|
|
||||||
i6 = _mm_unpacklo_epi64(i6, i7);\
|
|
||||||
t2 = _mm_unpackhi_epi64(t2, i7);\
|
|
||||||
/* load transpose mask into a register, because it will be used 8 times */\
|
|
||||||
i0 = _mm_shuffle_epi8(i0, o0);\
|
|
||||||
i2 = _mm_shuffle_epi8(i2, o0);\
|
|
||||||
i4 = _mm_shuffle_epi8(i4, o0);\
|
|
||||||
i6 = _mm_shuffle_epi8(i6, o0);\
|
|
||||||
o1 = _mm_shuffle_epi8(o1, o0);\
|
|
||||||
t0 = _mm_shuffle_epi8(t0, o0);\
|
|
||||||
t1 = _mm_shuffle_epi8(t1, o0);\
|
|
||||||
t2 = _mm_shuffle_epi8(t2, o0);\
|
|
||||||
/* continue with unpack using 4 temp registers */\
|
|
||||||
t3 = i4;\
|
|
||||||
o2 = o1;\
|
|
||||||
o0 = i0;\
|
|
||||||
t4 = t1;\
|
|
||||||
\
|
|
||||||
t3 = _mm_unpackhi_epi16(t3, i6);\
|
|
||||||
i4 = _mm_unpacklo_epi16(i4, i6);\
|
|
||||||
o0 = _mm_unpackhi_epi16(o0, i2);\
|
|
||||||
i0 = _mm_unpacklo_epi16(i0, i2);\
|
|
||||||
o2 = _mm_unpackhi_epi16(o2, t0);\
|
|
||||||
o1 = _mm_unpacklo_epi16(o1, t0);\
|
|
||||||
t4 = _mm_unpackhi_epi16(t4, t2);\
|
|
||||||
t1 = _mm_unpacklo_epi16(t1, t2);\
|
|
||||||
/* shuffle with immediate */\
|
|
||||||
i4 = _mm_shuffle_epi32(i4, 216);\
|
|
||||||
t3 = _mm_shuffle_epi32(t3, 216);\
|
|
||||||
o1 = _mm_shuffle_epi32(o1, 216);\
|
|
||||||
o2 = _mm_shuffle_epi32(o2, 216);\
|
|
||||||
i0 = _mm_shuffle_epi32(i0, 216);\
|
|
||||||
o0 = _mm_shuffle_epi32(o0, 216);\
|
|
||||||
t1 = _mm_shuffle_epi32(t1, 216);\
|
|
||||||
t4 = _mm_shuffle_epi32(t4, 216);\
|
|
||||||
/* continue with unpack */\
|
|
||||||
i1 = i0;\
|
|
||||||
i3 = o0;\
|
|
||||||
i5 = o1;\
|
|
||||||
i7 = o2;\
|
|
||||||
i0 = _mm_unpacklo_epi32(i0, i4);\
|
|
||||||
i1 = _mm_unpackhi_epi32(i1, i4);\
|
|
||||||
o0 = _mm_unpacklo_epi32(o0, t3);\
|
|
||||||
i3 = _mm_unpackhi_epi32(i3, t3);\
|
|
||||||
o1 = _mm_unpacklo_epi32(o1, t1);\
|
|
||||||
i5 = _mm_unpackhi_epi32(i5, t1);\
|
|
||||||
o2 = _mm_unpacklo_epi32(o2, t4);\
|
|
||||||
i7 = _mm_unpackhi_epi32(i7, t4);\
|
|
||||||
/* transpose done */\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* transform round constants into grsiVPERM mode */
|
|
||||||
#define grsiVPERM_Transform_RoundConst_CNT2(i, j){\
|
|
||||||
xmm0 = grsiROUND_CONST_P[i];\
|
|
||||||
xmm1 = grsiROUND_CONST_P[j];\
|
|
||||||
xmm2 = grsiROUND_CONST_Q[i];\
|
|
||||||
xmm3 = grsiROUND_CONST_Q[j];\
|
|
||||||
grsiVPERM_Transform_State(xmm0, xmm1, xmm2, xmm3, grsiVPERM_IPT, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10);\
|
|
||||||
xmm2 = _mm_xor_si128(xmm2, (grsiALL_15));\
|
|
||||||
xmm3 = _mm_xor_si128(xmm3, (grsiALL_15));\
|
|
||||||
grsiROUND_CONST_P[i] = xmm0;\
|
|
||||||
grsiROUND_CONST_P[j] = xmm1;\
|
|
||||||
grsiROUND_CONST_Q[i] = xmm2;\
|
|
||||||
grsiROUND_CONST_Q[j] = xmm3;\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
/* transform round constants into grsiVPERM mode */
|
|
||||||
#define grsiVPERM_Transform_RoundConst(){\
|
|
||||||
grsiVPERM_Transform_RoundConst_CNT2(0, 1);\
|
|
||||||
grsiVPERM_Transform_RoundConst_CNT2(2, 3);\
|
|
||||||
grsiVPERM_Transform_RoundConst_CNT2(4, 5);\
|
|
||||||
grsiVPERM_Transform_RoundConst_CNT2(6, 7);\
|
|
||||||
grsiVPERM_Transform_RoundConst_CNT2(8, 9);\
|
|
||||||
grsiVPERM_Transform_RoundConst_CNT2(10, 11);\
|
|
||||||
grsiVPERM_Transform_RoundConst_CNT2(12, 13);\
|
|
||||||
xmm0 = grsiALL_FF;\
|
|
||||||
grsiVPERM_Transform(xmm0, xmm1, grsiVPERM_IPT, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10);\
|
|
||||||
xmm0 = _mm_xor_si128(xmm0, (grsiALL_15));\
|
|
||||||
grsiALL_FF = xmm0;\
|
|
||||||
}/**/
|
|
||||||
|
|
||||||
|
|
||||||
IFUN void grsiINIT(u64* h)
|
|
||||||
#if !defined(DECLARE_IFUN)
|
|
||||||
;
|
|
||||||
#else
|
|
||||||
{
|
|
||||||
__m128i* const chaining = (__m128i*) h;
|
|
||||||
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
|
||||||
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
|
||||||
|
|
||||||
/* transform round constants into grsiVPERM mode */
|
|
||||||
grsiVPERM_Transform_RoundConst();
|
|
||||||
|
|
||||||
/* load IV into registers xmm8 - xmm15 */
|
|
||||||
xmm8 = chaining[0];
|
|
||||||
xmm9 = chaining[1];
|
|
||||||
xmm10 = chaining[2];
|
|
||||||
xmm11 = chaining[3];
|
|
||||||
xmm12 = chaining[4];
|
|
||||||
xmm13 = chaining[5];
|
|
||||||
xmm14 = chaining[6];
|
|
||||||
xmm15 = chaining[7];
|
|
||||||
|
|
||||||
/* transform chaining value from column ordering into row ordering */
|
|
||||||
grsiVPERM_Transform_State(xmm8, xmm9, xmm10, xmm11, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
||||||
grsiVPERM_Transform_State(xmm12, xmm13, xmm14, xmm15, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
||||||
grsiMatrix_Transpose(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
||||||
|
|
||||||
/* store transposed IV */
|
|
||||||
chaining[0] = xmm8;
|
|
||||||
chaining[1] = xmm9;
|
|
||||||
chaining[2] = xmm10;
|
|
||||||
chaining[3] = xmm11;
|
|
||||||
chaining[4] = xmm12;
|
|
||||||
chaining[5] = xmm13;
|
|
||||||
chaining[6] = xmm14;
|
|
||||||
chaining[7] = xmm15;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
IFUN void grsiTF1024(u64* h, u64* m)
|
|
||||||
#if !defined(DECLARE_IFUN)
|
|
||||||
;
|
|
||||||
#else
|
|
||||||
{
|
|
||||||
__m128i* const chaining = (__m128i*) h;
|
|
||||||
__m128i* const message = (__m128i*) m;
|
|
||||||
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
|
||||||
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
|
||||||
static __m128i TEMP_MUL1[8];
|
|
||||||
static __m128i TEMP_MUL2[8];
|
|
||||||
static __m128i TEMP_MUL4;
|
|
||||||
static __m128i QTEMP[8];
|
|
||||||
|
|
||||||
/* load message into registers xmm8 - xmm15 (Q = message) */
|
|
||||||
xmm8 = message[0];
|
|
||||||
xmm9 = message[1];
|
|
||||||
xmm10 = message[2];
|
|
||||||
xmm11 = message[3];
|
|
||||||
xmm12 = message[4];
|
|
||||||
xmm13 = message[5];
|
|
||||||
xmm14 = message[6];
|
|
||||||
xmm15 = message[7];
|
|
||||||
|
|
||||||
/* transform message M from column ordering into row ordering */
|
|
||||||
grsiVPERM_Transform_State(xmm8, xmm9, xmm10, xmm11, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
||||||
grsiVPERM_Transform_State(xmm12, xmm13, xmm14, xmm15, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
||||||
grsiMatrix_Transpose(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
|
||||||
|
|
||||||
/* store message M (Q input) for later */
|
|
||||||
QTEMP[0] = xmm8;
|
|
||||||
QTEMP[1] = xmm9;
|
|
||||||
QTEMP[2] = xmm10;
|
|
||||||
QTEMP[3] = xmm11;
|
|
||||||
QTEMP[4] = xmm12;
|
|
||||||
QTEMP[5] = xmm13;
|
|
||||||
QTEMP[6] = xmm14;
|
|
||||||
QTEMP[7] = xmm15;
|
|
||||||
|
|
||||||
/* xor CV to message to get P input */
|
|
||||||
/* result: CV+M in xmm8...xmm15 */
|
|
||||||
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
|
|
||||||
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
|
|
||||||
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
|
|
||||||
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
|
|
||||||
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
|
|
||||||
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
|
|
||||||
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
|
|
||||||
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
|
|
||||||
|
|
||||||
/* compute permutation P */
|
|
||||||
/* result: P(CV+M) in xmm8...xmm15 */
|
|
||||||
grsiROUNDS_P();
|
|
||||||
|
|
||||||
/* xor CV to P output (feed-forward) */
|
|
||||||
/* result: P(CV+M)+CV in xmm8...xmm15 */
|
|
||||||
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
|
|
||||||
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
|
|
||||||
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
|
|
||||||
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
|
|
||||||
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
|
|
||||||
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
|
|
||||||
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
|
|
||||||
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
|
|
||||||
|
|
||||||
/* store P(CV+M)+CV */
|
|
||||||
chaining[0] = xmm8;
|
|
||||||
chaining[1] = xmm9;
|
|
||||||
chaining[2] = xmm10;
|
|
||||||
chaining[3] = xmm11;
|
|
||||||
chaining[4] = xmm12;
|
|
||||||
chaining[5] = xmm13;
|
|
||||||
chaining[6] = xmm14;
|
|
||||||
chaining[7] = xmm15;
|
|
||||||
|
|
||||||
/* load message M (Q input) into xmm8-15 */
|
|
||||||
xmm8 = QTEMP[0];
|
|
||||||
xmm9 = QTEMP[1];
|
|
||||||
xmm10 = QTEMP[2];
|
|
||||||
xmm11 = QTEMP[3];
|
|
||||||
xmm12 = QTEMP[4];
|
|
||||||
xmm13 = QTEMP[5];
|
|
||||||
xmm14 = QTEMP[6];
|
|
||||||
xmm15 = QTEMP[7];
|
|
||||||
|
|
||||||
/* compute permutation Q */
|
|
||||||
/* result: Q(M) in xmm8...xmm15 */
|
|
||||||
grsiROUNDS_Q();
|
|
||||||
|
|
||||||
/* xor Q output */
|
|
||||||
/* result: P(CV+M)+CV+Q(M) in xmm8...xmm15 */
|
|
||||||
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
|
|
||||||
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
|
|
||||||
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
|
|
||||||
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
|
|
||||||
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
|
|
||||||
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
|
|
||||||
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
|
|
||||||
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
|
|
||||||
|
|
||||||
/* store CV */
|
|
||||||
chaining[0] = xmm8;
|
|
||||||
chaining[1] = xmm9;
|
|
||||||
chaining[2] = xmm10;
|
|
||||||
chaining[3] = xmm11;
|
|
||||||
chaining[4] = xmm12;
|
|
||||||
chaining[5] = xmm13;
|
|
||||||
chaining[6] = xmm14;
|
|
||||||
chaining[7] = xmm15;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
IFUN void grsiOF1024(u64* h)
|
|
||||||
#if !defined(DECLARE_IFUN)
|
|
||||||
;
|
|
||||||
#else
|
|
||||||
{
|
|
||||||
__m128i* const chaining = (__m128i*) h;
|
|
||||||
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
|
||||||
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
|
||||||
static __m128i TEMP_MUL1[8];
|
|
||||||
static __m128i TEMP_MUL2[8];
|
|
||||||
static __m128i TEMP_MUL4;
|
|
||||||
|
|
||||||
/* load CV into registers xmm8 - xmm15 */
|
|
||||||
xmm8 = chaining[0];
|
|
||||||
xmm9 = chaining[1];
|
|
||||||
xmm10 = chaining[2];
|
|
||||||
xmm11 = chaining[3];
|
|
||||||
xmm12 = chaining[4];
|
|
||||||
xmm13 = chaining[5];
|
|
||||||
xmm14 = chaining[6];
|
|
||||||
xmm15 = chaining[7];
|
|
||||||
|
|
||||||
/* compute permutation P */
|
|
||||||
/* result: P(CV) in xmm8...xmm15 */
|
|
||||||
grsiROUNDS_P();
|
|
||||||
|
|
||||||
/* xor CV to P output (feed-forward) */
|
|
||||||
/* result: P(CV)+CV in xmm8...xmm15 */
|
|
||||||
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
|
|
||||||
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
|
|
||||||
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
|
|
||||||
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
|
|
||||||
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
|
|
||||||
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
|
|
||||||
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
|
|
||||||
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
|
|
||||||
|
|
||||||
/* transpose CV back from row ordering to column ordering */
|
|
||||||
/* result: final hash value in xmm0, xmm6, xmm13, xmm15 */
|
|
||||||
grsiMatrix_Transpose_INV(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm4, xmm0, xmm6, xmm1, xmm2, xmm3, xmm5, xmm7);
|
|
||||||
grsiVPERM_Transform_State(xmm0, xmm6, xmm13, xmm15, grsiVPERM_OPT, xmm1, xmm2, xmm3, xmm5, xmm7, xmm10, xmm12);
|
|
||||||
|
|
||||||
/* we only need to return the truncated half of the state */
|
|
||||||
chaining[4] = xmm0;
|
|
||||||
chaining[5] = xmm6;
|
|
||||||
chaining[6] = xmm13;
|
|
||||||
chaining[7] = xmm15;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
@@ -1,273 +0,0 @@
|
|||||||
/* hash.c Aug 2011
|
|
||||||
*
|
|
||||||
* Groestl implementation for different versions.
|
|
||||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "grsi.h"
|
|
||||||
#include "grsi-asm.h"
|
|
||||||
|
|
||||||
/* void grsiInit(grsiState* ctx) { */
|
|
||||||
#define GRS_I \
|
|
||||||
do { \
|
|
||||||
grsiState *ctx = &sts_grs; \
|
|
||||||
u8 i = 0; \
|
|
||||||
\
|
|
||||||
/* set number of state columns and state size depending on \
|
|
||||||
variant */ \
|
|
||||||
ctx->grsicolumns = grsiCOLS; \
|
|
||||||
ctx->grsistatesize = grsiSIZE; \
|
|
||||||
ctx->grsiv = LONG; \
|
|
||||||
\
|
|
||||||
grsiSET_CONSTANTS(); \
|
|
||||||
\
|
|
||||||
memset(ctx->grsichaining, 0, sizeof(u64)*grsiSIZE/8); \
|
|
||||||
memset(ctx->grsibuffer, 0, sizeof(grsiBitSequence)*grsiSIZE); \
|
|
||||||
\
|
|
||||||
if (ctx->grsichaining == NULL || ctx->grsibuffer == NULL) \
|
|
||||||
return; \
|
|
||||||
\
|
|
||||||
/* set initial value */ \
|
|
||||||
ctx->grsichaining[ctx->grsicolumns-1] = grsiU64BIG((u64)grsiLENGTH); \
|
|
||||||
\
|
|
||||||
grsiINIT(ctx->grsichaining); \
|
|
||||||
\
|
|
||||||
/* set other variables */ \
|
|
||||||
ctx->grsibuf_ptr = 0; \
|
|
||||||
ctx->grsiblock_counter = 0; \
|
|
||||||
ctx->grsibits_in_last_byte = 0; \
|
|
||||||
\
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/* digest up to len bytes of input (full blocks only) */
|
|
||||||
void grsiTransform(grsiState *ctx,
|
|
||||||
const u8 *in,
|
|
||||||
unsigned long long len) {
|
|
||||||
|
|
||||||
/* increment block counter */
|
|
||||||
ctx->grsiblock_counter += len/grsiSIZE;
|
|
||||||
|
|
||||||
/* digest message, one block at a time */
|
|
||||||
for (; len >= grsiSIZE; len -= grsiSIZE, in += grsiSIZE)
|
|
||||||
grsiTF1024((u64*)ctx->grsichaining, (u64*)in);
|
|
||||||
|
|
||||||
asm volatile ("emms");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* given state h, do h <- P(h)+h */
|
|
||||||
void grsiOutputTransformation(grsiState *ctx) {
|
|
||||||
|
|
||||||
/* determine variant */
|
|
||||||
grsiOF1024((u64*)ctx->grsichaining);
|
|
||||||
|
|
||||||
asm volatile ("emms");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialise context */
|
|
||||||
void grsiInit(grsiState* ctx) {
|
|
||||||
u8 i = 0;
|
|
||||||
|
|
||||||
/* output size (in bits) must be a positive integer less than or
|
|
||||||
equal to 512, and divisible by 8 */
|
|
||||||
if (grsiLENGTH <= 0 || (grsiLENGTH%8) || grsiLENGTH > 512)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* set number of state columns and state size depending on
|
|
||||||
variant */
|
|
||||||
ctx->grsicolumns = grsiCOLS;
|
|
||||||
ctx->grsistatesize = grsiSIZE;
|
|
||||||
ctx->grsiv = LONG;
|
|
||||||
|
|
||||||
grsiSET_CONSTANTS();
|
|
||||||
|
|
||||||
for (i=0; i<grsiSIZE/8; i++)
|
|
||||||
ctx->grsichaining[i] = 0;
|
|
||||||
for (i=0; i<grsiSIZE; i++)
|
|
||||||
ctx->grsibuffer[i] = 0;
|
|
||||||
|
|
||||||
if (ctx->grsichaining == NULL || ctx->grsibuffer == NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* set initial value */
|
|
||||||
ctx->grsichaining[ctx->grsicolumns-1] = grsiU64BIG((u64)grsiLENGTH);
|
|
||||||
|
|
||||||
grsiINIT(ctx->grsichaining);
|
|
||||||
|
|
||||||
/* set other variables */
|
|
||||||
ctx->grsibuf_ptr = 0;
|
|
||||||
ctx->grsiblock_counter = 0;
|
|
||||||
ctx->grsibits_in_last_byte = 0;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update state with databitlen bits of input */
|
|
||||||
void grsiUpdate(grsiState* ctx,
|
|
||||||
const grsiBitSequence* input,
|
|
||||||
grsiDataLength databitlen) {
|
|
||||||
int index = 0;
|
|
||||||
int msglen = (int)(databitlen/8);
|
|
||||||
int rem = (int)(databitlen%8);
|
|
||||||
|
|
||||||
/* non-integral number of message bytes can only be supplied in the
|
|
||||||
last call to this function */
|
|
||||||
if (ctx->grsibits_in_last_byte) return;
|
|
||||||
|
|
||||||
/* if the buffer contains data that has not yet been digested, first
|
|
||||||
add data to buffer until full */
|
|
||||||
if (ctx->grsibuf_ptr) {
|
|
||||||
while (ctx->grsibuf_ptr < ctx->grsistatesize && index < msglen) {
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
if (ctx->grsibuf_ptr < ctx->grsistatesize) {
|
|
||||||
/* buffer still not full, return */
|
|
||||||
if (rem) {
|
|
||||||
ctx->grsibits_in_last_byte = rem;
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest buffer */
|
|
||||||
ctx->grsibuf_ptr = 0;
|
|
||||||
printf("error\n");
|
|
||||||
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest bulk of message */
|
|
||||||
grsiTransform(ctx, input+index, msglen-index);
|
|
||||||
index += ((msglen-index)/ctx->grsistatesize)*ctx->grsistatesize;
|
|
||||||
|
|
||||||
/* store remaining data in buffer */
|
|
||||||
while (index < msglen) {
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if non-integral number of bytes have been supplied, store
|
|
||||||
remaining bits in last byte, together with information about
|
|
||||||
number of bits */
|
|
||||||
if (rem) {
|
|
||||||
ctx->grsibits_in_last_byte = rem;
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update state with databitlen bits of input */
|
|
||||||
void grsiUpdateq(grsiState* ctx, const grsiBitSequence* input)
|
|
||||||
{
|
|
||||||
grsiDataLength databitlen= 64*8;
|
|
||||||
int index = 0;
|
|
||||||
int msglen = (int)(databitlen/8);
|
|
||||||
int rem = (int)(databitlen%8);
|
|
||||||
|
|
||||||
/* non-integral number of message bytes can only be supplied in the
|
|
||||||
last call to this function */
|
|
||||||
if (ctx->grsibits_in_last_byte) return;
|
|
||||||
|
|
||||||
/* if the buffer contains data that has not yet been digested, first
|
|
||||||
add data to buffer until full */
|
|
||||||
if (ctx->grsibuf_ptr) {
|
|
||||||
while (ctx->grsibuf_ptr < ctx->grsistatesize && index < msglen) {
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
if (ctx->grsibuf_ptr < ctx->grsistatesize) {
|
|
||||||
/* buffer still not full, return */
|
|
||||||
if (rem) {
|
|
||||||
ctx->grsibits_in_last_byte = rem;
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest buffer */
|
|
||||||
ctx->grsibuf_ptr = 0;
|
|
||||||
printf("error\n");
|
|
||||||
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest bulk of message */
|
|
||||||
grsiTransform(ctx, input+index, msglen-index);
|
|
||||||
index += ((msglen-index)/ctx->grsistatesize)*ctx->grsistatesize;
|
|
||||||
|
|
||||||
/* store remaining data in buffer */
|
|
||||||
while (index < msglen) {
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if non-integral number of bytes have been supplied, store
|
|
||||||
remaining bits in last byte, together with information about
|
|
||||||
number of bits */
|
|
||||||
if (rem) {
|
|
||||||
ctx->grsibits_in_last_byte = rem;
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define BILB ctx->grsibits_in_last_byte
|
|
||||||
|
|
||||||
/* finalise: process remaining data (including padding), perform
|
|
||||||
output transformation, and write hash result to 'output' */
|
|
||||||
void grsiFinal(grsiState* ctx,
|
|
||||||
grsiBitSequence* output) {
|
|
||||||
int i, j = 0, grsibytelen = grsiLENGTH/8;
|
|
||||||
u8 *s = (grsiBitSequence*)ctx->grsichaining;
|
|
||||||
|
|
||||||
/* pad with '1'-bit and first few '0'-bits */
|
|
||||||
if (BILB) {
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr-1] ^= 0x1<<(7-BILB);
|
|
||||||
BILB = 0;
|
|
||||||
}
|
|
||||||
else ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0x80;
|
|
||||||
|
|
||||||
/* pad with '0'-bits */
|
|
||||||
if (ctx->grsibuf_ptr > ctx->grsistatesize-grsiLENGTHFIELDLEN) {
|
|
||||||
/* padding requires two blocks */
|
|
||||||
while (ctx->grsibuf_ptr < ctx->grsistatesize) {
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0;
|
|
||||||
}
|
|
||||||
/* digest first padding block */
|
|
||||||
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
|
|
||||||
ctx->grsibuf_ptr = 0;
|
|
||||||
}
|
|
||||||
while (ctx->grsibuf_ptr < ctx->grsistatesize-grsiLENGTHFIELDLEN) {
|
|
||||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* length padding */
|
|
||||||
ctx->grsiblock_counter++;
|
|
||||||
ctx->grsibuf_ptr = ctx->grsistatesize;
|
|
||||||
while (ctx->grsibuf_ptr > ctx->grsistatesize-grsiLENGTHFIELDLEN) {
|
|
||||||
ctx->grsibuffer[(int)--ctx->grsibuf_ptr] = (u8)ctx->grsiblock_counter;
|
|
||||||
ctx->grsiblock_counter >>= 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest final padding block */
|
|
||||||
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
|
|
||||||
/* perform output transformation */
|
|
||||||
grsiOutputTransformation(ctx);
|
|
||||||
|
|
||||||
/* store hash result in output */
|
|
||||||
for (i = ctx->grsistatesize-grsibytelen; i < ctx->grsistatesize; i++,j++) {
|
|
||||||
output[j] = s[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* zeroise relevant variables and deallocate memory */
|
|
||||||
|
|
||||||
for (i = 0; i < ctx->grsicolumns; i++) {
|
|
||||||
ctx->grsichaining[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ctx->grsistatesize; i++) {
|
|
||||||
ctx->grsibuffer[i] = 0;
|
|
||||||
}
|
|
||||||
// free(ctx->grsichaining);
|
|
||||||
// free(ctx->grsibuffer);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
@@ -1,79 +0,0 @@
|
|||||||
/* hash.h Aug 2011
|
|
||||||
*
|
|
||||||
* Groestl implementation for different versions.
|
|
||||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __grsi_h
|
|
||||||
#define __grsi_h
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "brg_endian.h"
|
|
||||||
#define NEED_UINT_64T
|
|
||||||
#include "brg_types.h"
|
|
||||||
|
|
||||||
#define grsiLENGTH 512
|
|
||||||
|
|
||||||
/* some sizes (number of bytes) */
|
|
||||||
#define grsiROWS 8
|
|
||||||
#define grsiLENGTHFIELDLEN grsiROWS
|
|
||||||
#define grsiCOLS512 8
|
|
||||||
#define grsiCOLS1024 16
|
|
||||||
#define grsiSIZE512 (grsiROWS*grsiCOLS512)
|
|
||||||
#define grsiSIZE1024 (grsiROWS*grsiCOLS1024)
|
|
||||||
#define grsiROUNDS512 10
|
|
||||||
#define grsiROUNDS1024 14
|
|
||||||
|
|
||||||
#if grsiLENGTH<=256
|
|
||||||
#define grsiCOLS grsiCOLS512
|
|
||||||
#define grsiSIZE grsiSIZE512
|
|
||||||
#define grsiROUNDS grsiROUNDS512
|
|
||||||
#else
|
|
||||||
#define grsiCOLS grsiCOLS1024
|
|
||||||
#define grsiSIZE grsiSIZE1024
|
|
||||||
#define grsiROUNDS grsiROUNDS1024
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
|
|
||||||
|
|
||||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
|
||||||
#define grsiEXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
|
|
||||||
#define grsiU64BIG(a) (a)
|
|
||||||
#endif /* IS_BIG_ENDIAN */
|
|
||||||
|
|
||||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
|
||||||
#define grsiEXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
|
||||||
#define grsiU64BIG(a) \
|
|
||||||
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
|
|
||||||
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
|
|
||||||
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
|
|
||||||
(ROTL64(a,56) & li_64(FF000000FF000000)))
|
|
||||||
#endif /* IS_LITTLE_ENDIAN */
|
|
||||||
|
|
||||||
typedef enum { LONG, SHORT } grsiVar;
|
|
||||||
|
|
||||||
/* NIST API begin */
|
|
||||||
typedef unsigned char grsiBitSequence;
|
|
||||||
typedef unsigned long long grsiDataLength;
|
|
||||||
typedef struct {
|
|
||||||
__attribute__ ((aligned (32))) u64 grsichaining[grsiSIZE/8]; /* actual state */
|
|
||||||
__attribute__ ((aligned (32))) grsiBitSequence grsibuffer[grsiSIZE]; /* data buffer */
|
|
||||||
u64 grsiblock_counter; /* message block counter */
|
|
||||||
int grsibuf_ptr; /* data buffer pointer */
|
|
||||||
int grsibits_in_last_byte; /* no. of message bits in last byte of
|
|
||||||
data buffer */
|
|
||||||
int grsicolumns; /* no. of columns in state */
|
|
||||||
int grsistatesize; /* total no. of bytes in state */
|
|
||||||
grsiVar grsiv; /* LONG or SHORT */
|
|
||||||
} grsiState;
|
|
||||||
|
|
||||||
void grsiInit(grsiState*);
|
|
||||||
void grsiUpdate(grsiState*, const grsiBitSequence*, grsiDataLength);
|
|
||||||
void grsiFinal(grsiState*, grsiBitSequence*);
|
|
||||||
/* NIST API end */
|
|
||||||
|
|
||||||
#endif /* __hash_h */
|
|
File diff suppressed because it is too large
Load Diff
@@ -1,247 +0,0 @@
|
|||||||
/* hash.c Aug 2011
|
|
||||||
*
|
|
||||||
* Groestl implementation for different versions.
|
|
||||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "grsn-asm.h"
|
|
||||||
|
|
||||||
/* digest up to len bytes of input (full blocks only) */
|
|
||||||
void grsnTransform(grsnState *ctx,
|
|
||||||
const u8 *in,
|
|
||||||
unsigned long long len) {
|
|
||||||
|
|
||||||
/* increment block counter */
|
|
||||||
ctx->block_counter += len/grsnSIZE;
|
|
||||||
|
|
||||||
/* digest message, one block at a time */
|
|
||||||
for (; len >= grsnSIZE; len -= grsnSIZE, in += grsnSIZE)
|
|
||||||
#if grsnLENGTH<=256
|
|
||||||
TF512((u64*)ctx->chaining, (u64*)in);
|
|
||||||
#else
|
|
||||||
TF1024((u64*)ctx->chaining, (u64*)in);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
asm volatile ("emms");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* given state h, do h <- P(h)+h */
|
|
||||||
void grsnOutputTransformation(grsnState *ctx) {
|
|
||||||
|
|
||||||
/* determine variant */
|
|
||||||
#if (grsnLENGTH <= 256)
|
|
||||||
OF512((u64*)ctx->chaining);
|
|
||||||
#else
|
|
||||||
OF1024((u64*)ctx->chaining);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
asm volatile ("emms");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialise context */
|
|
||||||
void grsnInit(grsnState* ctx) {
|
|
||||||
u8 i = 0;
|
|
||||||
|
|
||||||
/* output size (in bits) must be a positive integer less than or
|
|
||||||
equal to 512, and divisible by 8 */
|
|
||||||
if (grsnLENGTH <= 0 || (grsnLENGTH%8) || grsnLENGTH > 512)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* set number of state columns and state size depending on
|
|
||||||
variant */
|
|
||||||
ctx->columns = grsnCOLS;
|
|
||||||
ctx->statesize = grsnSIZE;
|
|
||||||
#if (grsnLENGTH <= 256)
|
|
||||||
ctx->v = SHORT;
|
|
||||||
#else
|
|
||||||
ctx->v = LONG;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
SET_CONSTANTS();
|
|
||||||
|
|
||||||
for (i=0; i<grsnSIZE/8; i++)
|
|
||||||
ctx->chaining[i] = 0;
|
|
||||||
for (i=0; i<grsnSIZE; i++)
|
|
||||||
ctx->buffer[i] = 0;
|
|
||||||
|
|
||||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* set initial value */
|
|
||||||
ctx->chaining[ctx->columns-1] = U64BIG((u64)grsnLENGTH);
|
|
||||||
|
|
||||||
INIT(ctx->chaining);
|
|
||||||
|
|
||||||
/* set other variables */
|
|
||||||
ctx->buf_ptr = 0;
|
|
||||||
ctx->block_counter = 0;
|
|
||||||
ctx->bits_in_last_byte = 0;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update state with databitlen bits of input */
|
|
||||||
void grsnUpdate(grsnState* ctx,
|
|
||||||
const BitSequence* input,
|
|
||||||
DataLength databitlen) {
|
|
||||||
int index = 0;
|
|
||||||
int msglen = (int)(databitlen/8);
|
|
||||||
int rem = (int)(databitlen%8);
|
|
||||||
|
|
||||||
/* non-integral number of message bytes can only be supplied in the
|
|
||||||
last call to this function */
|
|
||||||
if (ctx->bits_in_last_byte) return;
|
|
||||||
|
|
||||||
/* if the buffer contains data that has not yet been digested, first
|
|
||||||
add data to buffer until full */
|
|
||||||
if (ctx->buf_ptr) {
|
|
||||||
while (ctx->buf_ptr < ctx->statesize && index < msglen) {
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
if (ctx->buf_ptr < ctx->statesize) {
|
|
||||||
/* buffer still not full, return */
|
|
||||||
if (rem) {
|
|
||||||
ctx->bits_in_last_byte = rem;
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest buffer */
|
|
||||||
ctx->buf_ptr = 0;
|
|
||||||
printf("error\n");
|
|
||||||
grsnTransform(ctx, ctx->buffer, ctx->statesize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest bulk of message */
|
|
||||||
grsnTransform(ctx, input+index, msglen-index);
|
|
||||||
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
|
|
||||||
|
|
||||||
/* store remaining data in buffer */
|
|
||||||
while (index < msglen) {
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if non-integral number of bytes have been supplied, store
|
|
||||||
remaining bits in last byte, together with information about
|
|
||||||
number of bits */
|
|
||||||
if (rem) {
|
|
||||||
ctx->bits_in_last_byte = rem;
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update state with databitlen bits of input */
|
|
||||||
void grsnUpdateq(grsnState* ctx, const BitSequence* input)
|
|
||||||
{
|
|
||||||
int index = 0;
|
|
||||||
int msglen = (int)((64*8)/8);
|
|
||||||
int rem = (int)((64*8)%8);
|
|
||||||
|
|
||||||
/* if the buffer contains data that has not yet been digested, first
|
|
||||||
add data to buffer until full */
|
|
||||||
if (ctx->buf_ptr) {
|
|
||||||
while (ctx->buf_ptr < ctx->statesize && index < msglen) {
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
if (ctx->buf_ptr < ctx->statesize) {
|
|
||||||
/* buffer still not full, return */
|
|
||||||
if (rem) {
|
|
||||||
ctx->bits_in_last_byte = rem;
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest buffer */
|
|
||||||
ctx->buf_ptr = 0;
|
|
||||||
printf("error\n");
|
|
||||||
grsnTransform(ctx, ctx->buffer, ctx->statesize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest bulk of message */
|
|
||||||
grsnTransform(ctx, input+index, msglen-index);
|
|
||||||
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
|
|
||||||
|
|
||||||
/* store remaining data in buffer */
|
|
||||||
while (index < msglen) {
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if non-integral number of bytes have been supplied, store
|
|
||||||
remaining bits in last byte, together with information about
|
|
||||||
number of bits */
|
|
||||||
if (rem) {
|
|
||||||
ctx->bits_in_last_byte = rem;
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define BILB ctx->bits_in_last_byte
|
|
||||||
|
|
||||||
/* finalise: process remaining data (including padding), perform
|
|
||||||
output transformation, and write hash result to 'output' */
|
|
||||||
void grsnFinal(grsnState* ctx,
|
|
||||||
BitSequence* output) {
|
|
||||||
int i, j = 0, grsnbytelen = grsnLENGTH/8;
|
|
||||||
u8 *s = (BitSequence*)ctx->chaining;
|
|
||||||
|
|
||||||
/* pad with '1'-bit and first few '0'-bits */
|
|
||||||
if (BILB) {
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
|
|
||||||
BILB = 0;
|
|
||||||
}
|
|
||||||
else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
|
|
||||||
|
|
||||||
/* pad with '0'-bits */
|
|
||||||
if (ctx->buf_ptr > ctx->statesize-grsnLENGTHFIELDLEN) {
|
|
||||||
/* padding requires two blocks */
|
|
||||||
while (ctx->buf_ptr < ctx->statesize) {
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = 0;
|
|
||||||
}
|
|
||||||
/* digest first padding block */
|
|
||||||
grsnTransform(ctx, ctx->buffer, ctx->statesize);
|
|
||||||
ctx->buf_ptr = 0;
|
|
||||||
}
|
|
||||||
while (ctx->buf_ptr < ctx->statesize-grsnLENGTHFIELDLEN) {
|
|
||||||
ctx->buffer[(int)ctx->buf_ptr++] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* length padding */
|
|
||||||
ctx->block_counter++;
|
|
||||||
ctx->buf_ptr = ctx->statesize;
|
|
||||||
while (ctx->buf_ptr > ctx->statesize-grsnLENGTHFIELDLEN) {
|
|
||||||
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
|
|
||||||
ctx->block_counter >>= 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest final padding block */
|
|
||||||
grsnTransform(ctx, ctx->buffer, ctx->statesize);
|
|
||||||
/* perform output transformation */
|
|
||||||
grsnOutputTransformation(ctx);
|
|
||||||
|
|
||||||
/* store hash result in output */
|
|
||||||
for (i = ctx->statesize-grsnbytelen; i < ctx->statesize; i++,j++) {
|
|
||||||
output[j] = s[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* zeroise relevant variables and deallocate memory */
|
|
||||||
|
|
||||||
for (i = 0; i < ctx->columns; i++) {
|
|
||||||
ctx->chaining[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ctx->statesize; i++) {
|
|
||||||
ctx->buffer[i] = 0;
|
|
||||||
}
|
|
||||||
// free(ctx->chaining);
|
|
||||||
// free(ctx->buffer);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
@@ -1,80 +0,0 @@
|
|||||||
/* hash.h Aug 2011
|
|
||||||
*
|
|
||||||
* Groestl implementation for different versions.
|
|
||||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __grsn_h
|
|
||||||
#define __grsn_h
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "brg_endian.h"
|
|
||||||
#define NEED_UINT_64T
|
|
||||||
#include "brg_types.h"
|
|
||||||
|
|
||||||
#ifndef grsnLENGTH
|
|
||||||
#define grsnLENGTH 512
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* some sizes (number of bytes) */
|
|
||||||
#define grsnROWS 8
|
|
||||||
#define grsnLENGTHFIELDLEN grsnROWS
|
|
||||||
#define grsnCOLS512 8
|
|
||||||
#define grsnCOLS1024 16
|
|
||||||
#define grsnSIZE512 (grsnROWS*grsnCOLS512)
|
|
||||||
#define grsnSIZE1024 (grsnROWS*grsnCOLS1024)
|
|
||||||
#define grsnROUNDS512 10
|
|
||||||
#define grsnROUNDS1024 14
|
|
||||||
|
|
||||||
#if grsnLENGTH<=256
|
|
||||||
#define grsnCOLS grsnCOLS512
|
|
||||||
#define grsnSIZE grsnSIZE512
|
|
||||||
#define grsnROUNDS grsnROUNDS512
|
|
||||||
#else
|
|
||||||
#define grsnCOLS grsnCOLS1024
|
|
||||||
#define grsnSIZE grsnSIZE1024
|
|
||||||
#define grsnROUNDS grsnROUNDS1024
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
|
|
||||||
|
|
||||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
|
||||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
|
|
||||||
#define U64BIG(a) (a)
|
|
||||||
#endif /* IS_BIG_ENDIAN */
|
|
||||||
|
|
||||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
|
||||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
|
||||||
#define U64BIG(a) \
|
|
||||||
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
|
|
||||||
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
|
|
||||||
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
|
|
||||||
(ROTL64(a,56) & li_64(FF000000FF000000)))
|
|
||||||
#endif /* IS_LITTLE_ENDIAN */
|
|
||||||
|
|
||||||
typedef enum { LONG, SHORT } Var;
|
|
||||||
|
|
||||||
/* NIST API begin */
|
|
||||||
typedef unsigned char BitSequence;
|
|
||||||
typedef unsigned long long DataLength;
|
|
||||||
typedef struct {
|
|
||||||
__attribute__ ((aligned (32))) u64 chaining[grsnSIZE/8]; /* actual state */
|
|
||||||
__attribute__ ((aligned (32))) BitSequence buffer[grsnSIZE]; /* data buffer */
|
|
||||||
u64 block_counter; /* message block counter */
|
|
||||||
int buf_ptr; /* data buffer pointer */
|
|
||||||
int bits_in_last_byte; /* no. of message bits in last byte of
|
|
||||||
data buffer */
|
|
||||||
int columns; /* no. of columns in state */
|
|
||||||
int statesize; /* total no. of bytes in state */
|
|
||||||
Var v; /* LONG or SHORT */
|
|
||||||
} grsnState;
|
|
||||||
|
|
||||||
void grsnInit(grsnState*);
|
|
||||||
void grsnUpdate(grsnState*, const BitSequence*, DataLength);
|
|
||||||
void grsnFinal(grsnState*, BitSequence*);
|
|
||||||
|
|
||||||
#endif /* __hash_h */
|
|
File diff suppressed because it is too large
Load Diff
@@ -1,10 +0,0 @@
|
|||||||
#ifndef GRSOASM_H
|
|
||||||
#define GRSOASM_H
|
|
||||||
|
|
||||||
#include "grso.h"
|
|
||||||
|
|
||||||
void grsoP1024ASM (u64 *x) ;
|
|
||||||
|
|
||||||
void grsoQ1024ASM (u64 *x) ;
|
|
||||||
|
|
||||||
#endif
|
|
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
|||||||
#ifndef GRSOASM_H
|
|
||||||
#define GRSOASM_H
|
|
||||||
/* really same as the mmx asm.h */
|
|
||||||
/* made just in case something must be changed */
|
|
||||||
#include "grso.h"
|
|
||||||
|
|
||||||
void grsoP1024ASM (u64 *x) ;
|
|
||||||
|
|
||||||
void grsoQ1024ASM (u64 *x) ;
|
|
||||||
|
|
||||||
#endif
|
|
@@ -1,110 +0,0 @@
|
|||||||
/* hash.c January 2011
|
|
||||||
*
|
|
||||||
* Groestl-512 implementation with inline assembly containing mmx and
|
|
||||||
* sse instructions. Optimized for Opteron.
|
|
||||||
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
//#include "grso.h"
|
|
||||||
//#include "grso-asm.h"
|
|
||||||
// #include "grsotab.h"
|
|
||||||
|
|
||||||
#define DECL_GRS
|
|
||||||
|
|
||||||
/* load initial constants */
|
|
||||||
#define GRS_I \
|
|
||||||
do { \
|
|
||||||
int i; \
|
|
||||||
/* set initial value */ \
|
|
||||||
for (i = 0; i < grsoCOLS-1; i++) sts_grs.grsstate[i] = 0; \
|
|
||||||
sts_grs.grsstate[grsoCOLS-1] = grsoU64BIG((u64)(8*grsoDIGESTSIZE)); \
|
|
||||||
\
|
|
||||||
/* set other variables */ \
|
|
||||||
sts_grs.grsbuf_ptr = 0; \
|
|
||||||
sts_grs.grsblock_counter = 0; \
|
|
||||||
} while (0); \
|
|
||||||
|
|
||||||
/* load hash */
|
|
||||||
#define GRS_U \
|
|
||||||
do { \
|
|
||||||
unsigned char* in = hash; \
|
|
||||||
unsigned long long index = 0; \
|
|
||||||
\
|
|
||||||
/* if the buffer contains data that has not yet been digested, first \
|
|
||||||
add data to buffer until full */ \
|
|
||||||
if (sts_grs.grsbuf_ptr) { \
|
|
||||||
while (sts_grs.grsbuf_ptr < grsoSIZE && index < 64) { \
|
|
||||||
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
|
|
||||||
} \
|
|
||||||
if (sts_grs.grsbuf_ptr < grsoSIZE) continue; \
|
|
||||||
\
|
|
||||||
/* digest buffer */ \
|
|
||||||
sts_grs.grsbuf_ptr = 0; \
|
|
||||||
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
/* digest bulk of message */ \
|
|
||||||
grsoTransform(&sts_grs, in+index, 64-index); \
|
|
||||||
index += ((64-index)/grsoSIZE)*grsoSIZE; \
|
|
||||||
\
|
|
||||||
/* store remaining data in buffer */ \
|
|
||||||
while (index < 64) { \
|
|
||||||
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
} while (0);
|
|
||||||
|
|
||||||
/* groestl512 hash loaded */
|
|
||||||
/* hash = groestl512(loaded) */
|
|
||||||
#define GRS_C \
|
|
||||||
do { \
|
|
||||||
char *out = hash; \
|
|
||||||
int i, j = 0; \
|
|
||||||
unsigned char *s = (unsigned char*)sts_grs.grsstate; \
|
|
||||||
\
|
|
||||||
hashbuf[sts_grs.grsbuf_ptr++] = 0x80; \
|
|
||||||
\
|
|
||||||
/* pad with '0'-bits */ \
|
|
||||||
if (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
|
|
||||||
/* padding requires two blocks */ \
|
|
||||||
while (sts_grs.grsbuf_ptr < grsoSIZE) { \
|
|
||||||
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
|
|
||||||
} \
|
|
||||||
/* digest first padding block */ \
|
|
||||||
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
|
|
||||||
sts_grs.grsbuf_ptr = 0; \
|
|
||||||
} \
|
|
||||||
while (sts_grs.grsbuf_ptr < grsoSIZE-grsoLENGTHFIELDLEN) { \
|
|
||||||
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
/* length padding */ \
|
|
||||||
sts_grs.grsblock_counter++; \
|
|
||||||
sts_grs.grsbuf_ptr = grsoSIZE; \
|
|
||||||
while (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
|
|
||||||
hashbuf[--sts_grs.grsbuf_ptr] = (unsigned char)sts_grs.grsblock_counter; \
|
|
||||||
sts_grs.grsblock_counter >>= 8; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
/* digest final padding block */ \
|
|
||||||
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
|
|
||||||
/* perform output transformation */ \
|
|
||||||
grsoOutputTransformation(&sts_grs); \
|
|
||||||
\
|
|
||||||
/* store hash result in output */ \
|
|
||||||
for (i = grsoSIZE-grsoDIGESTSIZE; i < grsoSIZE; i++,j++) { \
|
|
||||||
out[j] = s[i]; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
/* zeroise relevant variables and deallocate memory */ \
|
|
||||||
for (i = 0; i < grsoCOLS; i++) { \
|
|
||||||
sts_grs.grsstate[i] = 0; \
|
|
||||||
} \
|
|
||||||
for (i = 0; i < grsoSIZE; i++) { \
|
|
||||||
hashbuf[i] = 0; \
|
|
||||||
} \
|
|
||||||
} while (0);
|
|
||||||
|
|
||||||
|
|
@@ -1,57 +0,0 @@
|
|||||||
/* hash.c January 2011
|
|
||||||
*
|
|
||||||
* Groestl-512 implementation with inline assembly containing mmx and
|
|
||||||
* sse instructions. Optimized for Opteron.
|
|
||||||
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "algo/groestl/sse2/grso-asm.h"
|
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grsotab.h"
|
|
||||||
|
|
||||||
/* digest up to len bytes of input (full blocks only) */
|
|
||||||
void grsoTransform(grsoState *ctx,
|
|
||||||
const unsigned char *in,
|
|
||||||
unsigned long long len) {
|
|
||||||
u64 y[grsoCOLS+2] __attribute__ ((aligned (16)));
|
|
||||||
u64 z[grsoCOLS+2] __attribute__ ((aligned (16)));
|
|
||||||
u64 *m, *h = (u64*)ctx->grsstate;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* increment block counter */
|
|
||||||
ctx->grsblock_counter += len/grsoSIZE;
|
|
||||||
|
|
||||||
/* digest message, one block at a time */
|
|
||||||
for (; len >= grsoSIZE; len -= grsoSIZE, in += grsoSIZE) {
|
|
||||||
m = (u64*)in;
|
|
||||||
for (i = 0; i < grsoCOLS; i++) {
|
|
||||||
y[i] = m[i];
|
|
||||||
z[i] = m[i] ^ h[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
grsoQ1024ASM(y);
|
|
||||||
grsoP1024ASM(z);
|
|
||||||
|
|
||||||
/* h' == h + Q(m) + P(h+m) */
|
|
||||||
for (i = 0; i < grsoCOLS; i++) {
|
|
||||||
h[i] ^= z[i] ^ y[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* given state h, do h <- P(h)+h */
|
|
||||||
void grsoOutputTransformation(grsoState *ctx) {
|
|
||||||
u64 z[grsoCOLS] __attribute__ ((aligned (16)));
|
|
||||||
int j;
|
|
||||||
|
|
||||||
for (j = 0; j < grsoCOLS; j++) {
|
|
||||||
z[j] = ctx->grsstate[j];
|
|
||||||
}
|
|
||||||
grsoP1024ASM(z);
|
|
||||||
for (j = 0; j < grsoCOLS; j++) {
|
|
||||||
ctx->grsstate[j] ^= z[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@@ -1,62 +0,0 @@
|
|||||||
#ifndef __hash_h
|
|
||||||
#define __hash_h
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "brg_endian.h"
|
|
||||||
#include "brg_types.h"
|
|
||||||
|
|
||||||
/* some sizes (number of bytes) */
|
|
||||||
#define grsoROWS 8
|
|
||||||
#define grsoLENGTHFIELDLEN grsoROWS
|
|
||||||
#define grsoCOLS 16
|
|
||||||
#define grsoSIZE (grsoROWS*grsoCOLS)
|
|
||||||
#define grsoDIGESTSIZE 64
|
|
||||||
|
|
||||||
#define grsoROUNDS 14
|
|
||||||
|
|
||||||
#define grsoROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&((u64)0xffffffffffffffffULL))
|
|
||||||
|
|
||||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
|
||||||
#error
|
|
||||||
#endif /* IS_BIG_ENDIAN */
|
|
||||||
|
|
||||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
|
||||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
|
||||||
#define grsoU64BIG(a) \
|
|
||||||
((grsoROTL64(a, 8) & ((u64)0x000000ff000000ffULL)) | \
|
|
||||||
(grsoROTL64(a,24) & ((u64)0x0000ff000000ff00ULL)) | \
|
|
||||||
(grsoROTL64(a,40) & ((u64)0x00ff000000ff0000ULL)) | \
|
|
||||||
(grsoROTL64(a,56) & ((u64)0xff000000ff000000ULL)))
|
|
||||||
#endif /* IS_LITTLE_ENDIAN */
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
u64 grsstate[grsoCOLS]; /* actual state */
|
|
||||||
u64 grsblock_counter; /* message block counter */
|
|
||||||
int grsbuf_ptr; /* data buffer pointer */
|
|
||||||
} grsoState;
|
|
||||||
|
|
||||||
//extern int grsoInit(grsoState* ctx);
|
|
||||||
//extern int grsoUpdate(grsoState* ctx, const unsigned char* in,
|
|
||||||
// unsigned long long len);
|
|
||||||
//extern int grsoUpdateq(grsoState* ctx, const unsigned char* in);
|
|
||||||
//extern int grsoFinal(grsoState* ctx,
|
|
||||||
// unsigned char* out);
|
|
||||||
//
|
|
||||||
//extern int grsohash(unsigned char *out,
|
|
||||||
// const unsigned char *in,
|
|
||||||
// unsigned long long len);
|
|
||||||
|
|
||||||
/* digest up to len bytes of input (full blocks only) */
|
|
||||||
void grsoTransform( grsoState *ctx, const unsigned char *in,
|
|
||||||
unsigned long long len );
|
|
||||||
|
|
||||||
/* given state h, do h <- P(h)+h */
|
|
||||||
void grsoOutputTransformation( grsoState *ctx );
|
|
||||||
|
|
||||||
int grso_init ( grsoState* sts_grs );
|
|
||||||
int grso_update ( grsoState* sts_grs, char* hashbuf, char* hash );
|
|
||||||
int grso_close ( grsoState *sts_grs, char* hashbuf, char* hash );
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* __hash_h */
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -1,45 +0,0 @@
|
|||||||
/*
|
|
||||||
* file : hash_api.h
|
|
||||||
* version : 1.0.208
|
|
||||||
* date : 14.12.2010
|
|
||||||
*
|
|
||||||
* Grostl multi-stream bitsliced implementation Hash API
|
|
||||||
*
|
|
||||||
* Cagdas Calik
|
|
||||||
* ccalik@metu.edu.tr
|
|
||||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef GRSS_API_H
|
|
||||||
#define GRSS_API_H
|
|
||||||
|
|
||||||
#include "sha3_common.h"
|
|
||||||
#include <tmmintrin.h>
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
__m128i state1[8];
|
|
||||||
__m128i state2[8];
|
|
||||||
__m128i state3[8];
|
|
||||||
__m128i state4[8];
|
|
||||||
|
|
||||||
__m128i _Pconst[14][8];
|
|
||||||
__m128i _Qconst[14][8];
|
|
||||||
__m128i _shiftconst[8];
|
|
||||||
|
|
||||||
unsigned int uHashLength;
|
|
||||||
unsigned int uBlockLength;
|
|
||||||
|
|
||||||
BitSequence buffer[128];
|
|
||||||
|
|
||||||
} grssState;
|
|
||||||
|
|
||||||
void grssInit(grssState *state, int grssbitlen);
|
|
||||||
|
|
||||||
void grssUpdate(grssState *state, const BitSequence *data, DataLength databitlen);
|
|
||||||
|
|
||||||
void grssFinal(grssState *state, BitSequence *grssval);
|
|
||||||
|
|
||||||
#endif // HASH_API_H
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -1,202 +0,0 @@
|
|||||||
/* hash.c Aug 2011
|
|
||||||
*
|
|
||||||
* Groestl implementation for different versions.
|
|
||||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "grsv.h"
|
|
||||||
#include "grsv-asm.h"
|
|
||||||
|
|
||||||
/* digest up to len bytes of input (full blocks only) */
|
|
||||||
void grsvTransform(grsvState *ctx,
|
|
||||||
const u8 *in,
|
|
||||||
unsigned long long len) {
|
|
||||||
|
|
||||||
/* increment block counter */
|
|
||||||
ctx->grsvblock_counter += len/grsvSIZE;
|
|
||||||
|
|
||||||
/* digest message, one block at a time */
|
|
||||||
for (; len >= grsvSIZE; len -= grsvSIZE, in += grsvSIZE)
|
|
||||||
#if grsvLENGTH<=256
|
|
||||||
grsvTF512((u64*)ctx->grsvchaining, (u64*)in);
|
|
||||||
#else
|
|
||||||
grsvTF1024((u64*)ctx->grsvchaining, (u64*)in);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
asm volatile ("emms");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* given state h, do h <- P(h)+h */
|
|
||||||
void grsvOutputTransformation(grsvState *ctx) {
|
|
||||||
|
|
||||||
/* determine variant */
|
|
||||||
#if (grsvLENGTH <= 256)
|
|
||||||
grsvOF512((u64*)ctx->grsvchaining);
|
|
||||||
#else
|
|
||||||
grsvOF1024((u64*)ctx->grsvchaining);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
asm volatile ("emms");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialise context */
|
|
||||||
void grsvInit(grsvState* ctx) {
|
|
||||||
u8 i = 0;
|
|
||||||
|
|
||||||
/* output size (in bits) must be a positive integer less than or
|
|
||||||
equal to 512, and divisible by 8 */
|
|
||||||
if (grsvLENGTH <= 0 || (grsvLENGTH%8) || grsvLENGTH > 512)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* set number of state columns and state size depending on
|
|
||||||
variant */
|
|
||||||
ctx->grsvcolumns = grsvCOLS;
|
|
||||||
ctx->grsvstatesize = grsvSIZE;
|
|
||||||
#if (grsvLENGTH <= 256)
|
|
||||||
ctx->grsvv = SHORT;
|
|
||||||
#else
|
|
||||||
ctx->grsvv = LONG;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
SET_CONSTANTS();
|
|
||||||
|
|
||||||
for (i=0; i<grsvSIZE/8; i++)
|
|
||||||
ctx->grsvchaining[i] = 0;
|
|
||||||
for (i=0; i<grsvSIZE; i++)
|
|
||||||
ctx->grsvbuffer[i] = 0;
|
|
||||||
|
|
||||||
if (ctx->grsvchaining == NULL || ctx->grsvbuffer == NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* set initial value */
|
|
||||||
ctx->grsvchaining[ctx->grsvcolumns-1] = U64BIG((u64)grsvLENGTH);
|
|
||||||
|
|
||||||
grsvINIT(ctx->grsvchaining);
|
|
||||||
|
|
||||||
/* set other variables */
|
|
||||||
ctx->grsvbuf_ptr = 0;
|
|
||||||
ctx->grsvblock_counter = 0;
|
|
||||||
ctx->grsvbits_in_last_byte = 0;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update state with databitlen bits of input */
|
|
||||||
void grsvUpdate(grsvState* ctx,
|
|
||||||
const grsvBitSequence* input,
|
|
||||||
grsvDataLength databitlen) {
|
|
||||||
int index = 0;
|
|
||||||
int msglen = (int)(databitlen/8);
|
|
||||||
int rem = (int)(databitlen%8);
|
|
||||||
|
|
||||||
/* non-integral number of message bytes can only be supplied in the
|
|
||||||
last call to this function */
|
|
||||||
if (ctx->grsvbits_in_last_byte) return;
|
|
||||||
|
|
||||||
/* if the buffer contains data that has not yet been digested, first
|
|
||||||
add data to buffer until full */
|
|
||||||
if (ctx->grsvbuf_ptr) {
|
|
||||||
while (ctx->grsvbuf_ptr < ctx->grsvstatesize && index < msglen) {
|
|
||||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
if (ctx->grsvbuf_ptr < ctx->grsvstatesize) {
|
|
||||||
/* buffer still not full, return */
|
|
||||||
if (rem) {
|
|
||||||
ctx->grsvbits_in_last_byte = rem;
|
|
||||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest buffer */
|
|
||||||
ctx->grsvbuf_ptr = 0;
|
|
||||||
printf("error\n");
|
|
||||||
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest bulk of message */
|
|
||||||
grsvTransform(ctx, input+index, msglen-index);
|
|
||||||
index += ((msglen-index)/ctx->grsvstatesize)*ctx->grsvstatesize;
|
|
||||||
|
|
||||||
/* store remaining data in buffer */
|
|
||||||
while (index < msglen) {
|
|
||||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index++];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if non-integral number of bytes have been supplied, store
|
|
||||||
remaining bits in last byte, together with information about
|
|
||||||
number of bits */
|
|
||||||
if (rem) {
|
|
||||||
ctx->grsvbits_in_last_byte = rem;
|
|
||||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index];
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define BILB ctx->grsvbits_in_last_byte
|
|
||||||
|
|
||||||
/* finalise: process remaining data (including padding), perform
|
|
||||||
output transformation, and write hash result to 'output' */
|
|
||||||
void grsvFinal(grsvState* ctx,
|
|
||||||
grsvBitSequence* output) {
|
|
||||||
int i, j = 0, grsvbytelen = grsvLENGTH/8;
|
|
||||||
u8 *s = (grsvBitSequence*)ctx->grsvchaining;
|
|
||||||
|
|
||||||
/* pad with '1'-bit and first few '0'-bits */
|
|
||||||
if (BILB) {
|
|
||||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
|
|
||||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr-1] ^= 0x1<<(7-BILB);
|
|
||||||
BILB = 0;
|
|
||||||
}
|
|
||||||
else ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0x80;
|
|
||||||
|
|
||||||
/* pad with '0'-bits */
|
|
||||||
if (ctx->grsvbuf_ptr > ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
|
|
||||||
/* padding requires two blocks */
|
|
||||||
while (ctx->grsvbuf_ptr < ctx->grsvstatesize) {
|
|
||||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0;
|
|
||||||
}
|
|
||||||
/* digest first padding block */
|
|
||||||
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
|
|
||||||
ctx->grsvbuf_ptr = 0;
|
|
||||||
}
|
|
||||||
while (ctx->grsvbuf_ptr < ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
|
|
||||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* length padding */
|
|
||||||
ctx->grsvblock_counter++;
|
|
||||||
ctx->grsvbuf_ptr = ctx->grsvstatesize;
|
|
||||||
while (ctx->grsvbuf_ptr > ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
|
|
||||||
ctx->grsvbuffer[(int)--ctx->grsvbuf_ptr] = (u8)ctx->grsvblock_counter;
|
|
||||||
ctx->grsvblock_counter >>= 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* digest final padding block */
|
|
||||||
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
|
|
||||||
/* perform output transformation */
|
|
||||||
grsvOutputTransformation(ctx);
|
|
||||||
|
|
||||||
/* store hash result in output */
|
|
||||||
for (i = ctx->grsvstatesize-grsvbytelen; i < ctx->grsvstatesize; i++,j++) {
|
|
||||||
output[j] = s[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* zeroise relevant variables and deallocate memory */
|
|
||||||
|
|
||||||
for (i = 0; i < ctx->grsvcolumns; i++) {
|
|
||||||
ctx->grsvchaining[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ctx->grsvstatesize; i++) {
|
|
||||||
ctx->grsvbuffer[i] = 0;
|
|
||||||
}
|
|
||||||
// free(ctx->grsvchaining);
|
|
||||||
// free(ctx->buffer);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
@@ -1,77 +0,0 @@
|
|||||||
/* hash.h Aug 2011
|
|
||||||
*
|
|
||||||
* Groestl implementation for different versions.
|
|
||||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
|
||||||
*
|
|
||||||
* This code is placed in the public domain
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __grsv_h
|
|
||||||
#define __grsv_h
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "brg_endian.h"
|
|
||||||
#define NEED_UINT_64T
|
|
||||||
#include "brg_types.h"
|
|
||||||
|
|
||||||
#define grsvLENGTH 512
|
|
||||||
|
|
||||||
/* some sizes (number of bytes) */
|
|
||||||
#define grsvROWS 8
|
|
||||||
#define grsvLENGTHFIELDLEN grsvROWS
|
|
||||||
#define grsvCOLS512 8
|
|
||||||
#define grsvCOLS1024 16
|
|
||||||
#define grsvSIZE512 (grsvROWS*grsvCOLS512)
|
|
||||||
#define grsvSIZE1024 (grsvROWS*grsvCOLS1024)
|
|
||||||
#define grsvROUNDS512 10
|
|
||||||
#define grsvROUNDS1024 14
|
|
||||||
|
|
||||||
#if grsvLENGTH<=256
|
|
||||||
#define grsvCOLS grsvCOLS512
|
|
||||||
#define grsvSIZE grsvSIZE512
|
|
||||||
#define grsvROUNDS grsvROUNDS512
|
|
||||||
#else
|
|
||||||
#define grsvCOLS grsvCOLS1024
|
|
||||||
#define grsvSIZE grsvSIZE1024
|
|
||||||
#define grsvROUNDS grsvROUNDS1024
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
|
|
||||||
|
|
||||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
|
||||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
|
|
||||||
#define U64BIG(a) (a)
|
|
||||||
#endif /* IS_BIG_ENDIAN */
|
|
||||||
|
|
||||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
|
||||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
|
||||||
#define U64BIG(a) \
|
|
||||||
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
|
|
||||||
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
|
|
||||||
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
|
|
||||||
(ROTL64(a,56) & li_64(FF000000FF000000)))
|
|
||||||
#endif /* IS_LITTLE_ENDIAN */
|
|
||||||
|
|
||||||
typedef enum { LONG, SHORT } grsvVar;
|
|
||||||
|
|
||||||
typedef unsigned char grsvBitSequence;
|
|
||||||
typedef unsigned long long grsvDataLength;
|
|
||||||
typedef struct {
|
|
||||||
__attribute__ ((aligned (32))) u64 grsvchaining[grsvSIZE/8]; /* actual state */
|
|
||||||
__attribute__ ((aligned (32))) grsvBitSequence grsvbuffer[grsvSIZE]; /* data buffer */
|
|
||||||
u64 grsvblock_counter; /* message block counter */
|
|
||||||
int grsvbuf_ptr; /* data buffer pointer */
|
|
||||||
int grsvbits_in_last_byte; /* no. of message bits in last byte of
|
|
||||||
data buffer */
|
|
||||||
int grsvcolumns; /* no. of columns in state */
|
|
||||||
int grsvstatesize; /* total no. of bytes in state */
|
|
||||||
grsvVar grsvv; /* LONG or SHORT */
|
|
||||||
} grsvState;
|
|
||||||
|
|
||||||
void grsvInit(grsvState*);
|
|
||||||
void grsvUpdate(grsvState*, const grsvBitSequence*, grsvDataLength);
|
|
||||||
void grsvFinal(grsvState*, grsvBitSequence*);
|
|
||||||
|
|
||||||
#endif /* __grsv_h */
|
|
124
algo/hmq1725.c
124
algo/hmq1725.c
@@ -23,10 +23,7 @@
|
|||||||
#include "algo/sha2/sph-sha2.h"
|
#include "algo/sha2/sph-sha2.h"
|
||||||
#include "algo/haval/sph-haval.h"
|
#include "algo/haval/sph-haval.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -34,38 +31,31 @@
|
|||||||
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
||||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||||
#include "algo/simd/sse2/nist.h"
|
#include "algo/simd/sse2/nist.h"
|
||||||
//#include "algo/blake/sse2/blake.c"
|
|
||||||
//#include "algo/keccak/sse2/keccak.c"
|
|
||||||
//#include "algo/bmw/sse2/bmw.c"
|
|
||||||
//#include "algo/skein/sse2/skein.c"
|
|
||||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
sph_blake512_context blake1, blake2;
|
sph_blake512_context blake1, blake2;
|
||||||
sph_bmw512_context bmw1, bmw2, bmw3;
|
sph_bmw512_context bmw1, bmw2, bmw3;
|
||||||
sph_skein512_context skein1, skein2;
|
sph_skein512_context skein1, skein2;
|
||||||
sph_jh512_context jh1, jh2;
|
sph_jh512_context jh1, jh2;
|
||||||
sph_keccak512_context keccak1, keccak2;
|
sph_keccak512_context keccak1, keccak2;
|
||||||
// sph_luffa512_context luffa1, luffa2;
|
hashState_luffa luffa1, luffa2;
|
||||||
hashState_luffa luffa1, luffa2;
|
cubehashParam cube;
|
||||||
// sph_cubehash512_context cube1, cube2;
|
sph_shavite512_context shavite1, shavite2;
|
||||||
cubehashParam cube;
|
hashState_sd simd1, simd2;
|
||||||
sph_shavite512_context shavite1, shavite2;
|
sph_hamsi512_context hamsi1;
|
||||||
// sph_simd512_context simd1, simd2;
|
sph_fugue512_context fugue1, fugue2;
|
||||||
hashState_sd simd1, simd2;
|
sph_shabal512_context shabal1;
|
||||||
sph_hamsi512_context hamsi1;
|
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
|
||||||
sph_fugue512_context fugue1, fugue2;
|
sph_sha512_context sha1, sha2;
|
||||||
sph_shabal512_context shabal1;
|
sph_haval256_5_context haval1, haval2;
|
||||||
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
|
|
||||||
sph_sha512_context sha1, sha2;
|
|
||||||
sph_haval256_5_context haval1, haval2;
|
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
sph_groestl512_context groestl1, groestl2;
|
sph_groestl512_context groestl1, groestl2;
|
||||||
sph_echo512_context echo1, echo2;
|
sph_echo512_context echo1, echo2;
|
||||||
#else
|
#else
|
||||||
hashState_echo echo1, echo2;
|
hashState_echo echo1, echo2;
|
||||||
hashState_groestl groestl1, groestl2;
|
hashState_groestl groestl1, groestl2;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} hmq1725_ctx_holder;
|
} hmq1725_ctx_holder;
|
||||||
@@ -90,19 +80,14 @@ void init_hmq1725_ctx()
|
|||||||
sph_keccak512_init(&hmq1725_ctx.keccak1);
|
sph_keccak512_init(&hmq1725_ctx.keccak1);
|
||||||
sph_keccak512_init(&hmq1725_ctx.keccak2);
|
sph_keccak512_init(&hmq1725_ctx.keccak2);
|
||||||
|
|
||||||
// sph_luffa512_init(&hmq1725_ctx.luffa1);
|
|
||||||
// sph_luffa512_init(&hmq1725_ctx.luffa2);
|
|
||||||
init_luffa( &hmq1725_ctx.luffa1, 512 );
|
init_luffa( &hmq1725_ctx.luffa1, 512 );
|
||||||
init_luffa( &hmq1725_ctx.luffa2, 512 );
|
init_luffa( &hmq1725_ctx.luffa2, 512 );
|
||||||
|
|
||||||
// sph_cubehash512_init(&hmq1725_ctx.cubehash1);
|
|
||||||
cubehashInit( &hmq1725_ctx.cube, 512, 16, 32 );
|
cubehashInit( &hmq1725_ctx.cube, 512, 16, 32 );
|
||||||
|
|
||||||
sph_shavite512_init(&hmq1725_ctx.shavite1);
|
sph_shavite512_init(&hmq1725_ctx.shavite1);
|
||||||
sph_shavite512_init(&hmq1725_ctx.shavite2);
|
sph_shavite512_init(&hmq1725_ctx.shavite2);
|
||||||
|
|
||||||
// sph_simd512_init(&hmq1725_ctx.simd1);
|
|
||||||
// sph_simd512_init(&hmq1725_ctx.simd2);
|
|
||||||
init_sd( &hmq1725_ctx.simd1, 512 );
|
init_sd( &hmq1725_ctx.simd1, 512 );
|
||||||
init_sd( &hmq1725_ctx.simd2, 512 );
|
init_sd( &hmq1725_ctx.simd2, 512 );
|
||||||
|
|
||||||
@@ -135,46 +120,18 @@ void init_hmq1725_ctx()
|
|||||||
init_groestl( &hmq1725_ctx.groestl1 );
|
init_groestl( &hmq1725_ctx.groestl1 );
|
||||||
init_groestl( &hmq1725_ctx.groestl2 );
|
init_groestl( &hmq1725_ctx.groestl2 );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void hmq1725hash(void *state, const void *input)
|
extern void hmq1725hash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
hmq1725_ctx_holder ctx;
|
|
||||||
memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
|
|
||||||
|
|
||||||
size_t hashptr;
|
|
||||||
// DATA_ALIGNXY(sph_u64 hashctA,8);
|
|
||||||
// DATA_ALIGNXY(sph_u64 hashctB,8);
|
|
||||||
|
|
||||||
// DATA_ALIGNXY(unsigned char hash[128],16);
|
|
||||||
unsigned char hashbuf[128];
|
|
||||||
sph_u64 hashctA;
|
|
||||||
sph_u64 hashctB;
|
|
||||||
|
|
||||||
const uint32_t mask = 24;
|
const uint32_t mask = 24;
|
||||||
|
uint32_t hashA[25], hashB[25];
|
||||||
|
hmq1725_ctx_holder ctx;
|
||||||
|
|
||||||
//these uint512 in the c++ source of the client are backed by an array of uint32
|
memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
|
||||||
uint32_t hashA[25], hashB[25];
|
|
||||||
|
|
||||||
// unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
|
||||||
// #define hashA hash
|
|
||||||
// #define hashB (hash+64)
|
|
||||||
|
|
||||||
sph_bmw512 (&ctx.bmw1, input, 80); //0
|
sph_bmw512 (&ctx.bmw1, input, 80); //0
|
||||||
sph_bmw512_close(&ctx.bmw1, hashA); //1
|
sph_bmw512_close(&ctx.bmw1, hashA); //1
|
||||||
/*
|
|
||||||
DECL_BMW;
|
|
||||||
BMW_I;
|
|
||||||
BMW_U;
|
|
||||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
|
||||||
#define H(x) (h[x])
|
|
||||||
#define dH(x) (dh[x])
|
|
||||||
BMW_C;
|
|
||||||
#undef M
|
|
||||||
#undef H
|
|
||||||
#undef dH
|
|
||||||
*/
|
|
||||||
|
|
||||||
sph_whirlpool (&ctx.whirlpool1, hashA, 64); //0
|
sph_whirlpool (&ctx.whirlpool1, hashA, 64); //0
|
||||||
sph_whirlpool_close(&ctx.whirlpool1, hashB); //1
|
sph_whirlpool_close(&ctx.whirlpool1, hashB); //1
|
||||||
@@ -182,8 +139,8 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
if ( hashB[0] & mask ) //1
|
if ( hashB[0] & mask ) //1
|
||||||
{
|
{
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
sph_groestl512 (&ctx.groestl1, hashB, 64); //1
|
sph_groestl512 (&ctx.groestl1, hashB, 64); //1
|
||||||
sph_groestl512_close(&ctx.groestl1, hashA); //2
|
sph_groestl512_close(&ctx.groestl1, hashA); //2
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl1, (char*)hashB, 512 );
|
update_groestl( &ctx.groestl1, (char*)hashB, 512 );
|
||||||
final_groestl( &ctx.groestl1, (char*)hashA );
|
final_groestl( &ctx.groestl1, (char*)hashA );
|
||||||
@@ -191,8 +148,8 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sph_skein512 (&ctx.skein1, hashB, 64); //1
|
sph_skein512 (&ctx.skein1, hashB, 64); //1
|
||||||
sph_skein512_close(&ctx.skein1, hashA); //2
|
sph_skein512_close(&ctx.skein1, hashA); //2
|
||||||
}
|
}
|
||||||
|
|
||||||
sph_jh512 (&ctx.jh1, hashA, 64); //3
|
sph_jh512 (&ctx.jh1, hashA, 64); //3
|
||||||
@@ -212,13 +169,9 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
sph_bmw512_close(&ctx.bmw2, hashB); //5
|
sph_bmw512_close(&ctx.bmw2, hashB); //5
|
||||||
}
|
}
|
||||||
|
|
||||||
// sph_luffa512 (&ctx.luffa1, hashB, 64); //5
|
|
||||||
// sph_luffa512_close(&ctx.luffa1, hashA); //6
|
|
||||||
update_luffa( &ctx.luffa1, (BitSequence*)hashB, 512 );
|
update_luffa( &ctx.luffa1, (BitSequence*)hashB, 512 );
|
||||||
final_luffa( &ctx.luffa1, (BitSequence*)hashA );
|
final_luffa( &ctx.luffa1, (BitSequence*)hashA );
|
||||||
|
|
||||||
// sph_cubehash512 (&ctx.cubehash1, hashA, 64); //6
|
|
||||||
// sph_cubehash512_close(&ctx.cubehash1, hashB); //7
|
|
||||||
cubehashUpdate( &ctx.cube, (BitSequence *)hashA, 64 );
|
cubehashUpdate( &ctx.cube, (BitSequence *)hashA, 64 );
|
||||||
cubehashDigest( &ctx.cube, (BitSequence *)hashB );
|
cubehashDigest( &ctx.cube, (BitSequence *)hashB );
|
||||||
|
|
||||||
@@ -233,14 +186,11 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
sph_jh512_close(&ctx.jh2, hashA); //8
|
sph_jh512_close(&ctx.jh2, hashA); //8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
sph_shavite512 (&ctx.shavite1, hashA, 64); //3
|
sph_shavite512 (&ctx.shavite1, hashA, 64); //3
|
||||||
sph_shavite512_close(&ctx.shavite1, hashB); //4
|
sph_shavite512_close(&ctx.shavite1, hashB); //4
|
||||||
|
|
||||||
// sph_simd512 (&ctx.simd1, hashB, 64); //2
|
update_sd( &ctx.simd1, (BitSequence *)hashB, 512 );
|
||||||
// sph_simd512_close(&ctx.simd1, hashA); //3
|
final_sd( &ctx.simd1, (BitSequence *)hashA );
|
||||||
update_sd( &ctx.simd1, (BitSequence *)hashB, 512 );
|
|
||||||
final_sd( &ctx.simd1, (BitSequence *)hashA );
|
|
||||||
|
|
||||||
if ( hashA[0] & mask ) //4
|
if ( hashA[0] & mask ) //4
|
||||||
{
|
{
|
||||||
@@ -258,8 +208,8 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
sph_echo512 (&ctx.echo1, hashB, 64); //5
|
sph_echo512 (&ctx.echo1, hashB, 64); //5
|
||||||
sph_echo512_close(&ctx.echo1, hashA); //6
|
sph_echo512_close(&ctx.echo1, hashA); //6
|
||||||
#else
|
#else
|
||||||
update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 );
|
update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 );
|
||||||
final_echo( &ctx.echo1, (BitSequence *)hashA );
|
final_echo( &ctx.echo1, (BitSequence *)hashA );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sph_blake512 (&ctx.blake2, hashA, 64); //6
|
sph_blake512 (&ctx.blake2, hashA, 64); //6
|
||||||
@@ -272,8 +222,6 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// sph_luffa512 (&ctx.luffa2, hashB, 64); //7
|
|
||||||
// sph_luffa512_close(&ctx.luffa2, hashA); //8
|
|
||||||
update_luffa( &ctx.luffa2, (BitSequence *)hashB, 512 );
|
update_luffa( &ctx.luffa2, (BitSequence *)hashB, 512 );
|
||||||
final_luffa( &ctx.luffa2, (BitSequence *)hashA );
|
final_luffa( &ctx.luffa2, (BitSequence *)hashA );
|
||||||
}
|
}
|
||||||
@@ -287,8 +235,8 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
if ( hashA[0] & mask ) //4
|
if ( hashA[0] & mask ) //4
|
||||||
{
|
{
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
sph_echo512 (&ctx.echo2, hashA, 64); //
|
sph_echo512 (&ctx.echo2, hashA, 64); //
|
||||||
sph_echo512_close(&ctx.echo2, hashB); //5
|
sph_echo512_close(&ctx.echo2, hashB); //5
|
||||||
#else
|
#else
|
||||||
update_echo ( &ctx.echo2, (BitSequence *)hashA, 512 );
|
update_echo ( &ctx.echo2, (BitSequence *)hashA, 512 );
|
||||||
final_echo( &ctx.echo2, (BitSequence *)hashB );
|
final_echo( &ctx.echo2, (BitSequence *)hashB );
|
||||||
@@ -296,8 +244,6 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// sph_simd512 (&ctx.simd2, hashA, 64); //4
|
|
||||||
// sph_simd512_close(&ctx.simd2, hashB); //5
|
|
||||||
update_sd( &ctx.simd2, (BitSequence *)hashA, 512 );
|
update_sd( &ctx.simd2, (BitSequence *)hashA, 512 );
|
||||||
final_sd( &ctx.simd2, (BitSequence *)hashB );
|
final_sd( &ctx.simd2, (BitSequence *)hashB );
|
||||||
}
|
}
|
||||||
@@ -323,8 +269,8 @@ extern void hmq1725hash(void *state, const void *input)
|
|||||||
sph_groestl512 (&ctx.groestl2, hashA, 64); //3
|
sph_groestl512 (&ctx.groestl2, hashA, 64); //3
|
||||||
sph_groestl512_close(&ctx.groestl2, hashB); //4
|
sph_groestl512_close(&ctx.groestl2, hashB); //4
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl2, (char*)hashA, 512 );
|
update_groestl( &ctx.groestl2, (char*)hashA, 512 );
|
||||||
final_groestl( &ctx.groestl2, (char*)hashB );
|
final_groestl( &ctx.groestl2, (char*)hashB );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sph_sha512 (&ctx.sha2, hashB, 64); //2
|
sph_sha512 (&ctx.sha2, hashB, 64); //2
|
||||||
|
23
algo/nist5.c
23
algo/nist5.c
@@ -7,6 +7,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "algo/blake/sph_blake.h"
|
#include "algo/blake/sph_blake.h"
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
#include "algo/skein/sph_skein.h"
|
#include "algo/skein/sph_skein.h"
|
||||||
#include "algo/jh/sph_jh.h"
|
#include "algo/jh/sph_jh.h"
|
||||||
#include "algo/keccak/sph_keccak.h"
|
#include "algo/keccak/sph_keccak.h"
|
||||||
@@ -16,15 +17,14 @@
|
|||||||
#include "algo/skein/sse2/skein.c"
|
#include "algo/skein/sse2/skein.c"
|
||||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
#ifndef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
|
sph_groestl512_context groestl;
|
||||||
|
#else
|
||||||
hashState_groestl groestl;
|
hashState_groestl groestl;
|
||||||
#endif
|
#endif
|
||||||
} nist5_ctx_holder;
|
} nist5_ctx_holder;
|
||||||
@@ -33,16 +33,15 @@ nist5_ctx_holder nist5_ctx;
|
|||||||
|
|
||||||
void init_nist5_ctx()
|
void init_nist5_ctx()
|
||||||
{
|
{
|
||||||
#ifndef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
|
sph_groestl512_init( &nist5_ctx.groestl );
|
||||||
|
#else
|
||||||
init_groestl( &nist5_ctx.groestl );
|
init_groestl( &nist5_ctx.groestl );
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void nist5hash(void *output, const void *input)
|
void nist5hash(void *output, const void *input)
|
||||||
{
|
{
|
||||||
#ifdef NO_AES_NI
|
|
||||||
grsoState sts_grs;
|
|
||||||
#endif
|
|
||||||
size_t hashptr;
|
size_t hashptr;
|
||||||
unsigned char hashbuf[128];
|
unsigned char hashbuf[128];
|
||||||
sph_u64 hashctA;
|
sph_u64 hashctA;
|
||||||
@@ -54,16 +53,14 @@ void nist5hash(void *output, const void *input)
|
|||||||
nist5_ctx_holder ctx;
|
nist5_ctx_holder ctx;
|
||||||
memcpy( &ctx, &nist5_ctx, sizeof(nist5_ctx) );
|
memcpy( &ctx, &nist5_ctx, sizeof(nist5_ctx) );
|
||||||
|
|
||||||
|
|
||||||
DECL_BLK;
|
DECL_BLK;
|
||||||
BLK_I;
|
BLK_I;
|
||||||
BLK_W;
|
BLK_W;
|
||||||
BLK_C;
|
BLK_C;
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
GRS_I;
|
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||||
GRS_U;
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
GRS_C;
|
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||||
final_groestl( &ctx.groestl, (char*)hash);
|
final_groestl( &ctx.groestl, (char*)hash);
|
||||||
|
@@ -19,10 +19,7 @@
|
|||||||
#include "algo/skein/sse2/skein.c"
|
#include "algo/skein/sse2/skein.c"
|
||||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -36,37 +33,36 @@
|
|||||||
#define DATA_ALIGNXY(x,y) __declspec(align(y)) x
|
#define DATA_ALIGNXY(x,y) __declspec(align(y)) x
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
hashState_groestl quark_groestl_ctx;
|
sph_groestl512_context quark_ctx;
|
||||||
|
#else
|
||||||
|
hashState_groestl quark_ctx;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void init_quark_ctx()
|
void init_quark_ctx()
|
||||||
{
|
{
|
||||||
#ifndef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
init_groestl( &quark_groestl_ctx );
|
sph_groestl512_init( &quark_ctx );
|
||||||
|
#else
|
||||||
|
init_groestl( &quark_ctx );
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static void quarkhash(void *state, const void *input)
|
inline static void quarkhash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
#ifdef NO_AES_NI
|
|
||||||
grsoState sts_grs;
|
|
||||||
#else
|
|
||||||
hashState_groestl ctx;
|
|
||||||
memcpy(&ctx, &quark_groestl_ctx, sizeof(quark_groestl_ctx));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* shared temp space */
|
|
||||||
/* hash is really just 64bytes but it used to hold both hash and final round constants passed 64 */
|
|
||||||
|
|
||||||
unsigned char hashbuf[128];
|
unsigned char hashbuf[128];
|
||||||
size_t hashptr;
|
size_t hashptr;
|
||||||
sph_u64 hashctA;
|
sph_u64 hashctA;
|
||||||
sph_u64 hashctB;
|
sph_u64 hashctB;
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
unsigned char hash[128];
|
unsigned char hash[128];
|
||||||
|
#ifdef NO_AES_NI
|
||||||
|
sph_groestl512_context ctx;
|
||||||
|
#else
|
||||||
|
hashState_groestl ctx;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
memcpy( &ctx, &quark_ctx, sizeof(ctx) );
|
||||||
|
|
||||||
// Blake
|
// Blake
|
||||||
DECL_BLK;
|
DECL_BLK;
|
||||||
@@ -117,13 +113,13 @@ inline static void quarkhash(void *state, const void *input)
|
|||||||
{
|
{
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
GRS_I;
|
sph_groestl512_init( &ctx );
|
||||||
GRS_U;
|
sph_groestl512 ( &ctx, hash, 64 );
|
||||||
GRS_C;
|
sph_groestl512_close( &ctx, hash );
|
||||||
#else
|
#else
|
||||||
reinit_groestl( &ctx );
|
reinit_groestl( &ctx );
|
||||||
update_groestl(&ctx, (char*)hash,512);
|
update_groestl( &ctx, (char*)hash, 512 );
|
||||||
final_groestl(&ctx, (char*)hash);
|
final_groestl( &ctx, (char*)hash );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} while(0); continue;
|
} while(0); continue;
|
||||||
|
@@ -371,7 +371,6 @@ extern "C"{
|
|||||||
|
|
||||||
#define DECL_SKN \
|
#define DECL_SKN \
|
||||||
sph_u64 sknh0, sknh1, sknh2, sknh3, sknh4, sknh5, sknh6, sknh7; \
|
sph_u64 sknh0, sknh1, sknh2, sknh3, sknh4, sknh5, sknh6, sknh7; \
|
||||||
unsigned char sknbuf[64]; \
|
|
||||||
|
|
||||||
#define sknREAD_STATE_BIG(sc) do { \
|
#define sknREAD_STATE_BIG(sc) do { \
|
||||||
sknh0 = (sc)->sknh0; \
|
sknh0 = (sc)->sknh0; \
|
||||||
@@ -424,7 +423,6 @@ do { \
|
|||||||
do { \
|
do { \
|
||||||
unsigned char *buf; \
|
unsigned char *buf; \
|
||||||
size_t ptr; \
|
size_t ptr; \
|
||||||
unsigned first; \
|
|
||||||
size_t len = 64; \
|
size_t len = 64; \
|
||||||
const void *data = hash; \
|
const void *data = hash; \
|
||||||
buf = hashbuf; \
|
buf = hashbuf; \
|
||||||
@@ -441,7 +439,6 @@ do { \
|
|||||||
unsigned char *buf; \
|
unsigned char *buf; \
|
||||||
size_t ptr; \
|
size_t ptr; \
|
||||||
unsigned et; \
|
unsigned et; \
|
||||||
int i; \
|
|
||||||
\
|
\
|
||||||
buf = hashbuf; \
|
buf = hashbuf; \
|
||||||
ptr = hashptr; \
|
ptr = hashptr; \
|
||||||
|
@@ -18,10 +18,7 @@
|
|||||||
#include "algo/simd/sph_simd.h"
|
#include "algo/simd/sph_simd.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
// #include "algo/echo/sph_echo.h"
|
|
||||||
// #include "algo/groestl/sph_groestl.h"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#endif
|
#endif
|
||||||
|
@@ -17,10 +17,7 @@
|
|||||||
#include "algo/simd/sph_simd.h"
|
#include "algo/simd/sph_simd.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -40,7 +37,7 @@ typedef struct {
|
|||||||
hashState_sd simd;
|
hashState_sd simd;
|
||||||
sph_shavite512_context shavite;
|
sph_shavite512_context shavite;
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
// sph_groestl512_context groestl;
|
sph_groestl512_context groestl;
|
||||||
sph_echo512_context echo;
|
sph_echo512_context echo;
|
||||||
#else
|
#else
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
@@ -57,7 +54,7 @@ void init_x11_ctx()
|
|||||||
sph_shavite512_init( &x11_ctx.shavite );
|
sph_shavite512_init( &x11_ctx.shavite );
|
||||||
init_sd( &x11_ctx.simd, 512 );
|
init_sd( &x11_ctx.simd, 512 );
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
// sph_groestl512_init( &x11_ctx.groestl );
|
sph_groestl512_init( &x11_ctx.groestl );
|
||||||
sph_echo512_init( &x11_ctx.echo );
|
sph_echo512_init( &x11_ctx.echo );
|
||||||
#else
|
#else
|
||||||
init_echo( &x11_ctx.echo, 512 );
|
init_echo( &x11_ctx.echo, 512 );
|
||||||
@@ -92,13 +89,8 @@ static void x11_hash( void *state, const void *input )
|
|||||||
#undef dH
|
#undef dH
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
grsoState sts_grs;
|
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||||
GRS_I;
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
GRS_U;
|
|
||||||
GRS_C;
|
|
||||||
|
|
||||||
// sph_groestl512 (&ctx.groestl, hash, 64);
|
|
||||||
// sph_groestl512_close(&ctx.groestl, hash);
|
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl, (char*)hash, 512 );
|
update_groestl( &ctx.groestl, (char*)hash, 512 );
|
||||||
final_groestl( &ctx.groestl, (char*)hash );
|
final_groestl( &ctx.groestl, (char*)hash );
|
||||||
|
@@ -18,10 +18,7 @@
|
|||||||
#include "algo/simd/sph_simd.h"
|
#include "algo/simd/sph_simd.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
// #include "algo/groestl/sse2/grso.h"
|
|
||||||
// #include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#endif
|
#endif
|
||||||
|
@@ -6,6 +6,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
#include "algo/gost/sph_gost.h"
|
#include "algo/gost/sph_gost.h"
|
||||||
#include "algo/shavite/sph_shavite.h"
|
#include "algo/shavite/sph_shavite.h"
|
||||||
#include "algo/echo/sph_echo.h"
|
#include "algo/echo/sph_echo.h"
|
||||||
@@ -19,10 +20,7 @@
|
|||||||
#include "algo/skein/sse2/skein.c"
|
#include "algo/skein/sse2/skein.c"
|
||||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -34,6 +32,7 @@ typedef struct {
|
|||||||
cubehashParam cube;
|
cubehashParam cube;
|
||||||
hashState_sd simd;
|
hashState_sd simd;
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
|
sph_groestl512_context groestl;
|
||||||
sph_echo512_context echo;
|
sph_echo512_context echo;
|
||||||
#else
|
#else
|
||||||
hashState_echo echo;
|
hashState_echo echo;
|
||||||
@@ -51,6 +50,7 @@ void init_sib_ctx()
|
|||||||
cubehashInit( &sib_ctx.cube, 512, 16, 32 );
|
cubehashInit( &sib_ctx.cube, 512, 16, 32 );
|
||||||
init_sd( &sib_ctx.simd, 512 );
|
init_sd( &sib_ctx.simd, 512 );
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
|
sph_groestl512_init( &sib_ctx.groestl );
|
||||||
sph_echo512_init( &sib_ctx.echo );
|
sph_echo512_init( &sib_ctx.echo );
|
||||||
#else
|
#else
|
||||||
init_echo( &sib_ctx.echo, 512 );
|
init_echo( &sib_ctx.echo, 512 );
|
||||||
@@ -59,17 +59,12 @@ void init_sib_ctx()
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void sibhash(void *output, const void *input)
|
void sibhash(void *output, const void *input)
|
||||||
{
|
{
|
||||||
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
||||||
#define hashA hash
|
#define hashA hash
|
||||||
#define hashB hash+64
|
#define hashB hash+64
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
|
||||||
grsoState sts_grs;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
size_t hashptr;
|
size_t hashptr;
|
||||||
unsigned char hashbuf[128];
|
unsigned char hashbuf[128];
|
||||||
sph_u64 hashctA;
|
sph_u64 hashctA;
|
||||||
@@ -95,12 +90,11 @@ void sibhash(void *output, const void *input)
|
|||||||
#undef dH
|
#undef dH
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
GRS_I;
|
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||||
GRS_U;
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
GRS_C;
|
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||||
final_groestl( &ctx.groestl, (char*)hash);
|
final_groestl( &ctx.groestl, (char*)hash);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
DECL_SKN;
|
DECL_SKN;
|
||||||
|
@@ -29,10 +29,7 @@
|
|||||||
#include "algo/skein/sse2/skein.c"
|
#include "algo/skein/sse2/skein.c"
|
||||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -79,9 +76,6 @@ static void x13hash(void *output, const void *input)
|
|||||||
|
|
||||||
x13_ctx_holder ctx;
|
x13_ctx_holder ctx;
|
||||||
memcpy( &ctx, &x13_ctx, sizeof(x13_ctx) );
|
memcpy( &ctx, &x13_ctx, sizeof(x13_ctx) );
|
||||||
#ifdef NO_AES_NI
|
|
||||||
grsoState sts_grs;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// X11 algos
|
// X11 algos
|
||||||
|
|
||||||
@@ -116,12 +110,8 @@ static void x13hash(void *output, const void *input)
|
|||||||
//---groetl----
|
//---groetl----
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
// use GRS if possible
|
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||||
GRS_I;
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
GRS_U;
|
|
||||||
GRS_C;
|
|
||||||
// sph_groestl512 (&ctx.groestl, hash, 64);
|
|
||||||
// sph_groestl512_close(&ctx.groestl, hash);
|
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||||
final_groestl( &ctx.groestl, (char*)hash);
|
final_groestl( &ctx.groestl, (char*)hash);
|
||||||
|
@@ -31,10 +31,7 @@
|
|||||||
#include "algo/skein/sse2/skein.c"
|
#include "algo/skein/sse2/skein.c"
|
||||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -84,10 +81,6 @@ static void x14hash(void *output, const void *input)
|
|||||||
x14_ctx_holder ctx;
|
x14_ctx_holder ctx;
|
||||||
memcpy(&ctx, &x14_ctx, sizeof(x14_ctx));
|
memcpy(&ctx, &x14_ctx, sizeof(x14_ctx));
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
|
||||||
grsoState sts_grs;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
unsigned char hashbuf[128];
|
unsigned char hashbuf[128];
|
||||||
size_t hashptr;
|
size_t hashptr;
|
||||||
sph_u64 hashctA;
|
sph_u64 hashctA;
|
||||||
@@ -119,12 +112,8 @@ static void x14hash(void *output, const void *input)
|
|||||||
//---groestl----
|
//---groestl----
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
// use SSE2 optimized GRS if possible
|
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||||
GRS_I;
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
GRS_U;
|
|
||||||
GRS_C;
|
|
||||||
// sph_groestl512 (&ctx.groestl, hash, 64);
|
|
||||||
// sph_groestl512_close(&ctx.groestl, hash);
|
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||||
final_groestl( &ctx.groestl, (char*)hash);
|
final_groestl( &ctx.groestl, (char*)hash);
|
||||||
|
@@ -31,10 +31,7 @@
|
|||||||
#include "algo/skein/sse2/skein.c"
|
#include "algo/skein/sse2/skein.c"
|
||||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -86,10 +83,6 @@ static void x15hash(void *output, const void *input)
|
|||||||
x15_ctx_holder ctx;
|
x15_ctx_holder ctx;
|
||||||
memcpy( &ctx, &x15_ctx, sizeof(x15_ctx) );
|
memcpy( &ctx, &x15_ctx, sizeof(x15_ctx) );
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
|
||||||
grsoState sts_grs;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
unsigned char hashbuf[128];
|
unsigned char hashbuf[128];
|
||||||
size_t hashptr;
|
size_t hashptr;
|
||||||
sph_u64 hashctA;
|
sph_u64 hashctA;
|
||||||
@@ -120,14 +113,11 @@ static void x15hash(void *output, const void *input)
|
|||||||
//---groestl----
|
//---groestl----
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
GRS_I;
|
sph_groestl512(&ctx.groestl, hash, 64);
|
||||||
GRS_U;
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
GRS_C;
|
|
||||||
// sph_groestl512(&ctx.groestl, hash, 64);
|
|
||||||
// sph_groestl512_close(&ctx.groestl, hash);
|
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||||
final_groestl( &ctx.groestl, (char*)hash);
|
final_groestl( &ctx.groestl, (char*)hash);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//---skein4---
|
//---skein4---
|
||||||
|
@@ -33,10 +33,7 @@
|
|||||||
#include "algo/skein/sse2/skein.c"
|
#include "algo/skein/sse2/skein.c"
|
||||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -92,10 +89,6 @@ static void x17hash(void *output, const void *input)
|
|||||||
x17_ctx_holder ctx;
|
x17_ctx_holder ctx;
|
||||||
memcpy( &ctx, &x17_ctx, sizeof(x17_ctx) );
|
memcpy( &ctx, &x17_ctx, sizeof(x17_ctx) );
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
|
||||||
grsoState sts_grs;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
unsigned char hashbuf[128];
|
unsigned char hashbuf[128];
|
||||||
size_t hashptr;
|
size_t hashptr;
|
||||||
sph_u64 hashctA;
|
sph_u64 hashctA;
|
||||||
@@ -126,14 +119,11 @@ static void x17hash(void *output, const void *input)
|
|||||||
//---groestl----
|
//---groestl----
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
// GRS_I;
|
|
||||||
// GRS_U;
|
|
||||||
// GRS_C;
|
|
||||||
sph_groestl512(&ctx.groestl, hash, 64);
|
sph_groestl512(&ctx.groestl, hash, 64);
|
||||||
sph_groestl512_close(&ctx.groestl, hash);
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||||
final_groestl( &ctx.groestl, (char*)hash);
|
final_groestl( &ctx.groestl, (char*)hash);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//---skein4---
|
//---skein4---
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,821 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2011-2012 pooler@litecoinpool.org
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "cpuminer-config.h"
|
|
||||||
|
|
||||||
#if defined(__linux__) && defined(__ELF__)
|
|
||||||
.section .note.GNU-stack,"",%progbits
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__i386__)
|
|
||||||
|
|
||||||
.macro scrypt_shuffle src, so, dest, do
|
|
||||||
movl \so+60(\src), %eax
|
|
||||||
movl \so+44(\src), %ebx
|
|
||||||
movl \so+28(\src), %ecx
|
|
||||||
movl \so+12(\src), %edx
|
|
||||||
movl %eax, \do+12(\dest)
|
|
||||||
movl %ebx, \do+28(\dest)
|
|
||||||
movl %ecx, \do+44(\dest)
|
|
||||||
movl %edx, \do+60(\dest)
|
|
||||||
movl \so+40(\src), %eax
|
|
||||||
movl \so+8(\src), %ebx
|
|
||||||
movl \so+48(\src), %ecx
|
|
||||||
movl \so+16(\src), %edx
|
|
||||||
movl %eax, \do+8(\dest)
|
|
||||||
movl %ebx, \do+40(\dest)
|
|
||||||
movl %ecx, \do+16(\dest)
|
|
||||||
movl %edx, \do+48(\dest)
|
|
||||||
movl \so+20(\src), %eax
|
|
||||||
movl \so+4(\src), %ebx
|
|
||||||
movl \so+52(\src), %ecx
|
|
||||||
movl \so+36(\src), %edx
|
|
||||||
movl %eax, \do+4(\dest)
|
|
||||||
movl %ebx, \do+20(\dest)
|
|
||||||
movl %ecx, \do+36(\dest)
|
|
||||||
movl %edx, \do+52(\dest)
|
|
||||||
movl \so+0(\src), %eax
|
|
||||||
movl \so+24(\src), %ebx
|
|
||||||
movl \so+32(\src), %ecx
|
|
||||||
movl \so+56(\src), %edx
|
|
||||||
movl %eax, \do+0(\dest)
|
|
||||||
movl %ebx, \do+24(\dest)
|
|
||||||
movl %ecx, \do+32(\dest)
|
|
||||||
movl %edx, \do+56(\dest)
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro salsa8_core_gen_quadround
|
|
||||||
movl 52(%esp), %ecx
|
|
||||||
movl 4(%esp), %edx
|
|
||||||
movl 20(%esp), %ebx
|
|
||||||
movl 8(%esp), %esi
|
|
||||||
leal (%ecx, %edx), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl %ebx, 4(%esp)
|
|
||||||
movl 36(%esp), %edi
|
|
||||||
leal (%edx, %ebx), %ebp
|
|
||||||
roll $9, %ebp
|
|
||||||
xorl %ebp, %edi
|
|
||||||
movl 24(%esp), %ebp
|
|
||||||
movl %edi, 8(%esp)
|
|
||||||
addl %edi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %ecx
|
|
||||||
movl 40(%esp), %ebx
|
|
||||||
movl %ecx, 20(%esp)
|
|
||||||
addl %edi, %ecx
|
|
||||||
roll $18, %ecx
|
|
||||||
leal (%esi, %ebp), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl %ebx, 24(%esp)
|
|
||||||
movl 56(%esp), %edi
|
|
||||||
xorl %ecx, %edx
|
|
||||||
leal (%ebp, %ebx), %ecx
|
|
||||||
roll $9, %ecx
|
|
||||||
xorl %ecx, %edi
|
|
||||||
movl %edi, 36(%esp)
|
|
||||||
movl 28(%esp), %ecx
|
|
||||||
movl %edx, 28(%esp)
|
|
||||||
movl 44(%esp), %edx
|
|
||||||
addl %edi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %esi
|
|
||||||
movl 60(%esp), %ebx
|
|
||||||
movl %esi, 40(%esp)
|
|
||||||
addl %edi, %esi
|
|
||||||
roll $18, %esi
|
|
||||||
leal (%ecx, %edx), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl %ebx, 44(%esp)
|
|
||||||
movl 12(%esp), %edi
|
|
||||||
xorl %esi, %ebp
|
|
||||||
leal (%edx, %ebx), %esi
|
|
||||||
roll $9, %esi
|
|
||||||
xorl %esi, %edi
|
|
||||||
movl %edi, 12(%esp)
|
|
||||||
movl 48(%esp), %esi
|
|
||||||
movl %ebp, 48(%esp)
|
|
||||||
movl 64(%esp), %ebp
|
|
||||||
addl %edi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %ecx
|
|
||||||
movl 16(%esp), %ebx
|
|
||||||
movl %ecx, 16(%esp)
|
|
||||||
addl %edi, %ecx
|
|
||||||
roll $18, %ecx
|
|
||||||
leal (%esi, %ebp), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl 32(%esp), %edi
|
|
||||||
xorl %ecx, %edx
|
|
||||||
leal (%ebp, %ebx), %ecx
|
|
||||||
roll $9, %ecx
|
|
||||||
xorl %ecx, %edi
|
|
||||||
movl %edi, 32(%esp)
|
|
||||||
movl %ebx, %ecx
|
|
||||||
movl %edx, 52(%esp)
|
|
||||||
movl 28(%esp), %edx
|
|
||||||
addl %edi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %esi
|
|
||||||
movl 40(%esp), %ebx
|
|
||||||
movl %esi, 28(%esp)
|
|
||||||
addl %edi, %esi
|
|
||||||
roll $18, %esi
|
|
||||||
leal (%ecx, %edx), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl %ebx, 40(%esp)
|
|
||||||
movl 12(%esp), %edi
|
|
||||||
xorl %esi, %ebp
|
|
||||||
leal (%edx, %ebx), %esi
|
|
||||||
roll $9, %esi
|
|
||||||
xorl %esi, %edi
|
|
||||||
movl %edi, 12(%esp)
|
|
||||||
movl 4(%esp), %esi
|
|
||||||
movl %ebp, 4(%esp)
|
|
||||||
movl 48(%esp), %ebp
|
|
||||||
addl %edi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %ecx
|
|
||||||
movl 16(%esp), %ebx
|
|
||||||
movl %ecx, 16(%esp)
|
|
||||||
addl %edi, %ecx
|
|
||||||
roll $18, %ecx
|
|
||||||
leal (%esi, %ebp), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl %ebx, 48(%esp)
|
|
||||||
movl 32(%esp), %edi
|
|
||||||
xorl %ecx, %edx
|
|
||||||
leal (%ebp, %ebx), %ecx
|
|
||||||
roll $9, %ecx
|
|
||||||
xorl %ecx, %edi
|
|
||||||
movl %edi, 32(%esp)
|
|
||||||
movl 24(%esp), %ecx
|
|
||||||
movl %edx, 24(%esp)
|
|
||||||
movl 52(%esp), %edx
|
|
||||||
addl %edi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %esi
|
|
||||||
movl 28(%esp), %ebx
|
|
||||||
movl %esi, 28(%esp)
|
|
||||||
addl %edi, %esi
|
|
||||||
roll $18, %esi
|
|
||||||
leal (%ecx, %edx), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl %ebx, 52(%esp)
|
|
||||||
movl 8(%esp), %edi
|
|
||||||
xorl %esi, %ebp
|
|
||||||
leal (%edx, %ebx), %esi
|
|
||||||
roll $9, %esi
|
|
||||||
xorl %esi, %edi
|
|
||||||
movl %edi, 8(%esp)
|
|
||||||
movl 44(%esp), %esi
|
|
||||||
movl %ebp, 44(%esp)
|
|
||||||
movl 4(%esp), %ebp
|
|
||||||
addl %edi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %ecx
|
|
||||||
movl 20(%esp), %ebx
|
|
||||||
movl %ecx, 4(%esp)
|
|
||||||
addl %edi, %ecx
|
|
||||||
roll $18, %ecx
|
|
||||||
leal (%esi, %ebp), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl 36(%esp), %edi
|
|
||||||
xorl %ecx, %edx
|
|
||||||
leal (%ebp, %ebx), %ecx
|
|
||||||
roll $9, %ecx
|
|
||||||
xorl %ecx, %edi
|
|
||||||
movl %edi, 20(%esp)
|
|
||||||
movl %ebx, %ecx
|
|
||||||
movl %edx, 36(%esp)
|
|
||||||
movl 24(%esp), %edx
|
|
||||||
addl %edi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %esi
|
|
||||||
movl 28(%esp), %ebx
|
|
||||||
movl %esi, 24(%esp)
|
|
||||||
addl %edi, %esi
|
|
||||||
roll $18, %esi
|
|
||||||
leal (%ecx, %edx), %edi
|
|
||||||
roll $7, %edi
|
|
||||||
xorl %edi, %ebx
|
|
||||||
movl %ebx, 28(%esp)
|
|
||||||
xorl %esi, %ebp
|
|
||||||
movl 8(%esp), %esi
|
|
||||||
leal (%edx, %ebx), %edi
|
|
||||||
roll $9, %edi
|
|
||||||
xorl %edi, %esi
|
|
||||||
movl 40(%esp), %edi
|
|
||||||
movl %ebp, 8(%esp)
|
|
||||||
movl 44(%esp), %ebp
|
|
||||||
movl %esi, 40(%esp)
|
|
||||||
addl %esi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %ecx
|
|
||||||
movl 4(%esp), %ebx
|
|
||||||
movl %ecx, 44(%esp)
|
|
||||||
addl %esi, %ecx
|
|
||||||
roll $18, %ecx
|
|
||||||
leal (%edi, %ebp), %esi
|
|
||||||
roll $7, %esi
|
|
||||||
xorl %esi, %ebx
|
|
||||||
movl %ebx, 4(%esp)
|
|
||||||
movl 20(%esp), %esi
|
|
||||||
xorl %ecx, %edx
|
|
||||||
leal (%ebp, %ebx), %ecx
|
|
||||||
roll $9, %ecx
|
|
||||||
xorl %ecx, %esi
|
|
||||||
movl %esi, 56(%esp)
|
|
||||||
movl 48(%esp), %ecx
|
|
||||||
movl %edx, 20(%esp)
|
|
||||||
movl 36(%esp), %edx
|
|
||||||
addl %esi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %edi
|
|
||||||
movl 24(%esp), %ebx
|
|
||||||
movl %edi, 24(%esp)
|
|
||||||
addl %esi, %edi
|
|
||||||
roll $18, %edi
|
|
||||||
leal (%ecx, %edx), %esi
|
|
||||||
roll $7, %esi
|
|
||||||
xorl %esi, %ebx
|
|
||||||
movl %ebx, 60(%esp)
|
|
||||||
movl 12(%esp), %esi
|
|
||||||
xorl %edi, %ebp
|
|
||||||
leal (%edx, %ebx), %edi
|
|
||||||
roll $9, %edi
|
|
||||||
xorl %edi, %esi
|
|
||||||
movl %esi, 12(%esp)
|
|
||||||
movl 52(%esp), %edi
|
|
||||||
movl %ebp, 36(%esp)
|
|
||||||
movl 8(%esp), %ebp
|
|
||||||
addl %esi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %ecx
|
|
||||||
movl 16(%esp), %ebx
|
|
||||||
movl %ecx, 16(%esp)
|
|
||||||
addl %esi, %ecx
|
|
||||||
roll $18, %ecx
|
|
||||||
leal (%edi, %ebp), %esi
|
|
||||||
roll $7, %esi
|
|
||||||
xorl %esi, %ebx
|
|
||||||
movl 32(%esp), %esi
|
|
||||||
xorl %ecx, %edx
|
|
||||||
leal (%ebp, %ebx), %ecx
|
|
||||||
roll $9, %ecx
|
|
||||||
xorl %ecx, %esi
|
|
||||||
movl %esi, 32(%esp)
|
|
||||||
movl %ebx, %ecx
|
|
||||||
movl %edx, 48(%esp)
|
|
||||||
movl 20(%esp), %edx
|
|
||||||
addl %esi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %edi
|
|
||||||
movl 24(%esp), %ebx
|
|
||||||
movl %edi, 20(%esp)
|
|
||||||
addl %esi, %edi
|
|
||||||
roll $18, %edi
|
|
||||||
leal (%ecx, %edx), %esi
|
|
||||||
roll $7, %esi
|
|
||||||
xorl %esi, %ebx
|
|
||||||
movl %ebx, 8(%esp)
|
|
||||||
movl 12(%esp), %esi
|
|
||||||
xorl %edi, %ebp
|
|
||||||
leal (%edx, %ebx), %edi
|
|
||||||
roll $9, %edi
|
|
||||||
xorl %edi, %esi
|
|
||||||
movl %esi, 12(%esp)
|
|
||||||
movl 28(%esp), %edi
|
|
||||||
movl %ebp, 52(%esp)
|
|
||||||
movl 36(%esp), %ebp
|
|
||||||
addl %esi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %ecx
|
|
||||||
movl 16(%esp), %ebx
|
|
||||||
movl %ecx, 16(%esp)
|
|
||||||
addl %esi, %ecx
|
|
||||||
roll $18, %ecx
|
|
||||||
leal (%edi, %ebp), %esi
|
|
||||||
roll $7, %esi
|
|
||||||
xorl %esi, %ebx
|
|
||||||
movl %ebx, 28(%esp)
|
|
||||||
movl 32(%esp), %esi
|
|
||||||
xorl %ecx, %edx
|
|
||||||
leal (%ebp, %ebx), %ecx
|
|
||||||
roll $9, %ecx
|
|
||||||
xorl %ecx, %esi
|
|
||||||
movl %esi, 32(%esp)
|
|
||||||
movl 4(%esp), %ecx
|
|
||||||
movl %edx, 4(%esp)
|
|
||||||
movl 48(%esp), %edx
|
|
||||||
addl %esi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %edi
|
|
||||||
movl 20(%esp), %ebx
|
|
||||||
movl %edi, 20(%esp)
|
|
||||||
addl %esi, %edi
|
|
||||||
roll $18, %edi
|
|
||||||
leal (%ecx, %edx), %esi
|
|
||||||
roll $7, %esi
|
|
||||||
xorl %esi, %ebx
|
|
||||||
movl %ebx, 48(%esp)
|
|
||||||
movl 40(%esp), %esi
|
|
||||||
xorl %edi, %ebp
|
|
||||||
leal (%edx, %ebx), %edi
|
|
||||||
roll $9, %edi
|
|
||||||
xorl %edi, %esi
|
|
||||||
movl %esi, 36(%esp)
|
|
||||||
movl 60(%esp), %edi
|
|
||||||
movl %ebp, 24(%esp)
|
|
||||||
movl 52(%esp), %ebp
|
|
||||||
addl %esi, %ebx
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %ecx
|
|
||||||
movl 44(%esp), %ebx
|
|
||||||
movl %ecx, 40(%esp)
|
|
||||||
addl %esi, %ecx
|
|
||||||
roll $18, %ecx
|
|
||||||
leal (%edi, %ebp), %esi
|
|
||||||
roll $7, %esi
|
|
||||||
xorl %esi, %ebx
|
|
||||||
movl %ebx, 52(%esp)
|
|
||||||
movl 56(%esp), %esi
|
|
||||||
xorl %ecx, %edx
|
|
||||||
leal (%ebp, %ebx), %ecx
|
|
||||||
roll $9, %ecx
|
|
||||||
xorl %ecx, %esi
|
|
||||||
movl %esi, 56(%esp)
|
|
||||||
addl %esi, %ebx
|
|
||||||
movl %edx, 44(%esp)
|
|
||||||
roll $13, %ebx
|
|
||||||
xorl %ebx, %edi
|
|
||||||
movl %edi, 60(%esp)
|
|
||||||
addl %esi, %edi
|
|
||||||
roll $18, %edi
|
|
||||||
xorl %edi, %ebp
|
|
||||||
movl %ebp, 64(%esp)
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.text
|
|
||||||
.p2align 5
|
|
||||||
salsa8_core_gen:
|
|
||||||
salsa8_core_gen_quadround
|
|
||||||
salsa8_core_gen_quadround
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
.text
|
|
||||||
.p2align 5
|
|
||||||
.globl scrypt_core
|
|
||||||
.globl _scrypt_core
|
|
||||||
scrypt_core:
|
|
||||||
_scrypt_core:
|
|
||||||
pushl %ebx
|
|
||||||
pushl %ebp
|
|
||||||
pushl %edi
|
|
||||||
pushl %esi
|
|
||||||
|
|
||||||
/* Check for SSE2 availability */
|
|
||||||
movl $1, %eax
|
|
||||||
cpuid
|
|
||||||
andl $0x04000000, %edx
|
|
||||||
jnz scrypt_core_sse2
|
|
||||||
|
|
||||||
scrypt_core_gen:
|
|
||||||
movl 20(%esp), %edi
|
|
||||||
movl 24(%esp), %esi
|
|
||||||
subl $72, %esp
|
|
||||||
|
|
||||||
.macro scrypt_core_macro1a p, q
|
|
||||||
movl \p(%edi), %eax
|
|
||||||
movl \q(%edi), %edx
|
|
||||||
movl %eax, \p(%esi)
|
|
||||||
movl %edx, \q(%esi)
|
|
||||||
xorl %edx, %eax
|
|
||||||
movl %eax, \p(%edi)
|
|
||||||
movl %eax, \p(%esp)
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro scrypt_core_macro1b p, q
|
|
||||||
movl \p(%edi), %eax
|
|
||||||
xorl \p(%esi, %edx), %eax
|
|
||||||
movl \q(%edi), %ebx
|
|
||||||
xorl \q(%esi, %edx), %ebx
|
|
||||||
movl %ebx, \q(%edi)
|
|
||||||
xorl %ebx, %eax
|
|
||||||
movl %eax, \p(%edi)
|
|
||||||
movl %eax, \p(%esp)
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro scrypt_core_macro2 p, q
|
|
||||||
movl \p(%esp), %eax
|
|
||||||
addl \p(%edi), %eax
|
|
||||||
movl %eax, \p(%edi)
|
|
||||||
xorl \q(%edi), %eax
|
|
||||||
movl %eax, \q(%edi)
|
|
||||||
movl %eax, \p(%esp)
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro scrypt_core_macro3 p, q
|
|
||||||
movl \p(%esp), %eax
|
|
||||||
addl \q(%edi), %eax
|
|
||||||
movl %eax, \q(%edi)
|
|
||||||
.endm
|
|
||||||
|
|
||||||
leal 131072(%esi), %ecx
|
|
||||||
scrypt_core_gen_loop1:
|
|
||||||
movl %esi, 64(%esp)
|
|
||||||
movl %ecx, 68(%esp)
|
|
||||||
|
|
||||||
scrypt_core_macro1a 0, 64
|
|
||||||
scrypt_core_macro1a 4, 68
|
|
||||||
scrypt_core_macro1a 8, 72
|
|
||||||
scrypt_core_macro1a 12, 76
|
|
||||||
scrypt_core_macro1a 16, 80
|
|
||||||
scrypt_core_macro1a 20, 84
|
|
||||||
scrypt_core_macro1a 24, 88
|
|
||||||
scrypt_core_macro1a 28, 92
|
|
||||||
scrypt_core_macro1a 32, 96
|
|
||||||
scrypt_core_macro1a 36, 100
|
|
||||||
scrypt_core_macro1a 40, 104
|
|
||||||
scrypt_core_macro1a 44, 108
|
|
||||||
scrypt_core_macro1a 48, 112
|
|
||||||
scrypt_core_macro1a 52, 116
|
|
||||||
scrypt_core_macro1a 56, 120
|
|
||||||
scrypt_core_macro1a 60, 124
|
|
||||||
|
|
||||||
call salsa8_core_gen
|
|
||||||
|
|
||||||
movl 92(%esp), %edi
|
|
||||||
scrypt_core_macro2 0, 64
|
|
||||||
scrypt_core_macro2 4, 68
|
|
||||||
scrypt_core_macro2 8, 72
|
|
||||||
scrypt_core_macro2 12, 76
|
|
||||||
scrypt_core_macro2 16, 80
|
|
||||||
scrypt_core_macro2 20, 84
|
|
||||||
scrypt_core_macro2 24, 88
|
|
||||||
scrypt_core_macro2 28, 92
|
|
||||||
scrypt_core_macro2 32, 96
|
|
||||||
scrypt_core_macro2 36, 100
|
|
||||||
scrypt_core_macro2 40, 104
|
|
||||||
scrypt_core_macro2 44, 108
|
|
||||||
scrypt_core_macro2 48, 112
|
|
||||||
scrypt_core_macro2 52, 116
|
|
||||||
scrypt_core_macro2 56, 120
|
|
||||||
scrypt_core_macro2 60, 124
|
|
||||||
|
|
||||||
call salsa8_core_gen
|
|
||||||
|
|
||||||
movl 92(%esp), %edi
|
|
||||||
scrypt_core_macro3 0, 64
|
|
||||||
scrypt_core_macro3 4, 68
|
|
||||||
scrypt_core_macro3 8, 72
|
|
||||||
scrypt_core_macro3 12, 76
|
|
||||||
scrypt_core_macro3 16, 80
|
|
||||||
scrypt_core_macro3 20, 84
|
|
||||||
scrypt_core_macro3 24, 88
|
|
||||||
scrypt_core_macro3 28, 92
|
|
||||||
scrypt_core_macro3 32, 96
|
|
||||||
scrypt_core_macro3 36, 100
|
|
||||||
scrypt_core_macro3 40, 104
|
|
||||||
scrypt_core_macro3 44, 108
|
|
||||||
scrypt_core_macro3 48, 112
|
|
||||||
scrypt_core_macro3 52, 116
|
|
||||||
scrypt_core_macro3 56, 120
|
|
||||||
scrypt_core_macro3 60, 124
|
|
||||||
|
|
||||||
movl 64(%esp), %esi
|
|
||||||
movl 68(%esp), %ecx
|
|
||||||
addl $128, %esi
|
|
||||||
cmpl %ecx, %esi
|
|
||||||
jne scrypt_core_gen_loop1
|
|
||||||
|
|
||||||
movl 96(%esp), %esi
|
|
||||||
movl $1024, %ecx
|
|
||||||
scrypt_core_gen_loop2:
|
|
||||||
movl %ecx, 68(%esp)
|
|
||||||
|
|
||||||
movl 64(%edi), %edx
|
|
||||||
andl $1023, %edx
|
|
||||||
shll $7, %edx
|
|
||||||
|
|
||||||
scrypt_core_macro1b 0, 64
|
|
||||||
scrypt_core_macro1b 4, 68
|
|
||||||
scrypt_core_macro1b 8, 72
|
|
||||||
scrypt_core_macro1b 12, 76
|
|
||||||
scrypt_core_macro1b 16, 80
|
|
||||||
scrypt_core_macro1b 20, 84
|
|
||||||
scrypt_core_macro1b 24, 88
|
|
||||||
scrypt_core_macro1b 28, 92
|
|
||||||
scrypt_core_macro1b 32, 96
|
|
||||||
scrypt_core_macro1b 36, 100
|
|
||||||
scrypt_core_macro1b 40, 104
|
|
||||||
scrypt_core_macro1b 44, 108
|
|
||||||
scrypt_core_macro1b 48, 112
|
|
||||||
scrypt_core_macro1b 52, 116
|
|
||||||
scrypt_core_macro1b 56, 120
|
|
||||||
scrypt_core_macro1b 60, 124
|
|
||||||
|
|
||||||
call salsa8_core_gen
|
|
||||||
|
|
||||||
movl 92(%esp), %edi
|
|
||||||
scrypt_core_macro2 0, 64
|
|
||||||
scrypt_core_macro2 4, 68
|
|
||||||
scrypt_core_macro2 8, 72
|
|
||||||
scrypt_core_macro2 12, 76
|
|
||||||
scrypt_core_macro2 16, 80
|
|
||||||
scrypt_core_macro2 20, 84
|
|
||||||
scrypt_core_macro2 24, 88
|
|
||||||
scrypt_core_macro2 28, 92
|
|
||||||
scrypt_core_macro2 32, 96
|
|
||||||
scrypt_core_macro2 36, 100
|
|
||||||
scrypt_core_macro2 40, 104
|
|
||||||
scrypt_core_macro2 44, 108
|
|
||||||
scrypt_core_macro2 48, 112
|
|
||||||
scrypt_core_macro2 52, 116
|
|
||||||
scrypt_core_macro2 56, 120
|
|
||||||
scrypt_core_macro2 60, 124
|
|
||||||
|
|
||||||
call salsa8_core_gen
|
|
||||||
|
|
||||||
movl 92(%esp), %edi
|
|
||||||
movl 96(%esp), %esi
|
|
||||||
scrypt_core_macro3 0, 64
|
|
||||||
scrypt_core_macro3 4, 68
|
|
||||||
scrypt_core_macro3 8, 72
|
|
||||||
scrypt_core_macro3 12, 76
|
|
||||||
scrypt_core_macro3 16, 80
|
|
||||||
scrypt_core_macro3 20, 84
|
|
||||||
scrypt_core_macro3 24, 88
|
|
||||||
scrypt_core_macro3 28, 92
|
|
||||||
scrypt_core_macro3 32, 96
|
|
||||||
scrypt_core_macro3 36, 100
|
|
||||||
scrypt_core_macro3 40, 104
|
|
||||||
scrypt_core_macro3 44, 108
|
|
||||||
scrypt_core_macro3 48, 112
|
|
||||||
scrypt_core_macro3 52, 116
|
|
||||||
scrypt_core_macro3 56, 120
|
|
||||||
scrypt_core_macro3 60, 124
|
|
||||||
|
|
||||||
movl 68(%esp), %ecx
|
|
||||||
subl $1, %ecx
|
|
||||||
ja scrypt_core_gen_loop2
|
|
||||||
|
|
||||||
addl $72, %esp
|
|
||||||
popl %esi
|
|
||||||
popl %edi
|
|
||||||
popl %ebp
|
|
||||||
popl %ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
.macro salsa8_core_sse2_doubleround
|
|
||||||
movdqa %xmm1, %xmm4
|
|
||||||
paddd %xmm0, %xmm4
|
|
||||||
movdqa %xmm4, %xmm5
|
|
||||||
pslld $7, %xmm4
|
|
||||||
psrld $25, %xmm5
|
|
||||||
pxor %xmm4, %xmm3
|
|
||||||
movdqa %xmm0, %xmm4
|
|
||||||
pxor %xmm5, %xmm3
|
|
||||||
|
|
||||||
paddd %xmm3, %xmm4
|
|
||||||
movdqa %xmm4, %xmm5
|
|
||||||
pslld $9, %xmm4
|
|
||||||
psrld $23, %xmm5
|
|
||||||
pxor %xmm4, %xmm2
|
|
||||||
movdqa %xmm3, %xmm4
|
|
||||||
pxor %xmm5, %xmm2
|
|
||||||
pshufd $0x93, %xmm3, %xmm3
|
|
||||||
|
|
||||||
paddd %xmm2, %xmm4
|
|
||||||
movdqa %xmm4, %xmm5
|
|
||||||
pslld $13, %xmm4
|
|
||||||
psrld $19, %xmm5
|
|
||||||
pxor %xmm4, %xmm1
|
|
||||||
movdqa %xmm2, %xmm4
|
|
||||||
pxor %xmm5, %xmm1
|
|
||||||
pshufd $0x4e, %xmm2, %xmm2
|
|
||||||
|
|
||||||
paddd %xmm1, %xmm4
|
|
||||||
movdqa %xmm4, %xmm5
|
|
||||||
pslld $18, %xmm4
|
|
||||||
psrld $14, %xmm5
|
|
||||||
pxor %xmm4, %xmm0
|
|
||||||
movdqa %xmm3, %xmm4
|
|
||||||
pxor %xmm5, %xmm0
|
|
||||||
pshufd $0x39, %xmm1, %xmm1
|
|
||||||
|
|
||||||
paddd %xmm0, %xmm4
|
|
||||||
movdqa %xmm4, %xmm5
|
|
||||||
pslld $7, %xmm4
|
|
||||||
psrld $25, %xmm5
|
|
||||||
pxor %xmm4, %xmm1
|
|
||||||
movdqa %xmm0, %xmm4
|
|
||||||
pxor %xmm5, %xmm1
|
|
||||||
|
|
||||||
paddd %xmm1, %xmm4
|
|
||||||
movdqa %xmm4, %xmm5
|
|
||||||
pslld $9, %xmm4
|
|
||||||
psrld $23, %xmm5
|
|
||||||
pxor %xmm4, %xmm2
|
|
||||||
movdqa %xmm1, %xmm4
|
|
||||||
pxor %xmm5, %xmm2
|
|
||||||
pshufd $0x93, %xmm1, %xmm1
|
|
||||||
|
|
||||||
paddd %xmm2, %xmm4
|
|
||||||
movdqa %xmm4, %xmm5
|
|
||||||
pslld $13, %xmm4
|
|
||||||
psrld $19, %xmm5
|
|
||||||
pxor %xmm4, %xmm3
|
|
||||||
movdqa %xmm2, %xmm4
|
|
||||||
pxor %xmm5, %xmm3
|
|
||||||
pshufd $0x4e, %xmm2, %xmm2
|
|
||||||
|
|
||||||
paddd %xmm3, %xmm4
|
|
||||||
movdqa %xmm4, %xmm5
|
|
||||||
pslld $18, %xmm4
|
|
||||||
psrld $14, %xmm5
|
|
||||||
pxor %xmm4, %xmm0
|
|
||||||
pshufd $0x39, %xmm3, %xmm3
|
|
||||||
pxor %xmm5, %xmm0
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro salsa8_core_sse2
|
|
||||||
salsa8_core_sse2_doubleround
|
|
||||||
salsa8_core_sse2_doubleround
|
|
||||||
salsa8_core_sse2_doubleround
|
|
||||||
salsa8_core_sse2_doubleround
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.p2align 5
|
|
||||||
scrypt_core_sse2:
|
|
||||||
movl 20(%esp), %edi
|
|
||||||
movl 24(%esp), %esi
|
|
||||||
movl %esp, %ebp
|
|
||||||
subl $128, %esp
|
|
||||||
andl $-16, %esp
|
|
||||||
|
|
||||||
scrypt_shuffle %edi, 0, %esp, 0
|
|
||||||
scrypt_shuffle %edi, 64, %esp, 64
|
|
||||||
|
|
||||||
movdqa 96(%esp), %xmm6
|
|
||||||
movdqa 112(%esp), %xmm7
|
|
||||||
|
|
||||||
movl %esi, %edx
|
|
||||||
leal 131072(%esi), %ecx
|
|
||||||
scrypt_core_sse2_loop1:
|
|
||||||
movdqa 0(%esp), %xmm0
|
|
||||||
movdqa 16(%esp), %xmm1
|
|
||||||
movdqa 32(%esp), %xmm2
|
|
||||||
movdqa 48(%esp), %xmm3
|
|
||||||
movdqa 64(%esp), %xmm4
|
|
||||||
movdqa 80(%esp), %xmm5
|
|
||||||
pxor %xmm4, %xmm0
|
|
||||||
pxor %xmm5, %xmm1
|
|
||||||
movdqa %xmm0, 0(%edx)
|
|
||||||
movdqa %xmm1, 16(%edx)
|
|
||||||
pxor %xmm6, %xmm2
|
|
||||||
pxor %xmm7, %xmm3
|
|
||||||
movdqa %xmm2, 32(%edx)
|
|
||||||
movdqa %xmm3, 48(%edx)
|
|
||||||
movdqa %xmm4, 64(%edx)
|
|
||||||
movdqa %xmm5, 80(%edx)
|
|
||||||
movdqa %xmm6, 96(%edx)
|
|
||||||
movdqa %xmm7, 112(%edx)
|
|
||||||
|
|
||||||
salsa8_core_sse2
|
|
||||||
paddd 0(%edx), %xmm0
|
|
||||||
paddd 16(%edx), %xmm1
|
|
||||||
paddd 32(%edx), %xmm2
|
|
||||||
paddd 48(%edx), %xmm3
|
|
||||||
movdqa %xmm0, 0(%esp)
|
|
||||||
movdqa %xmm1, 16(%esp)
|
|
||||||
movdqa %xmm2, 32(%esp)
|
|
||||||
movdqa %xmm3, 48(%esp)
|
|
||||||
|
|
||||||
pxor 64(%esp), %xmm0
|
|
||||||
pxor 80(%esp), %xmm1
|
|
||||||
pxor %xmm6, %xmm2
|
|
||||||
pxor %xmm7, %xmm3
|
|
||||||
movdqa %xmm0, 64(%esp)
|
|
||||||
movdqa %xmm1, 80(%esp)
|
|
||||||
movdqa %xmm2, %xmm6
|
|
||||||
movdqa %xmm3, %xmm7
|
|
||||||
salsa8_core_sse2
|
|
||||||
paddd 64(%esp), %xmm0
|
|
||||||
paddd 80(%esp), %xmm1
|
|
||||||
paddd %xmm2, %xmm6
|
|
||||||
paddd %xmm3, %xmm7
|
|
||||||
movdqa %xmm0, 64(%esp)
|
|
||||||
movdqa %xmm1, 80(%esp)
|
|
||||||
|
|
||||||
addl $128, %edx
|
|
||||||
cmpl %ecx, %edx
|
|
||||||
jne scrypt_core_sse2_loop1
|
|
||||||
|
|
||||||
movdqa 64(%esp), %xmm4
|
|
||||||
movdqa 80(%esp), %xmm5
|
|
||||||
|
|
||||||
movl $1024, %ecx
|
|
||||||
scrypt_core_sse2_loop2:
|
|
||||||
movd %xmm4, %edx
|
|
||||||
movdqa 0(%esp), %xmm0
|
|
||||||
movdqa 16(%esp), %xmm1
|
|
||||||
movdqa 32(%esp), %xmm2
|
|
||||||
movdqa 48(%esp), %xmm3
|
|
||||||
andl $1023, %edx
|
|
||||||
shll $7, %edx
|
|
||||||
pxor 0(%esi, %edx), %xmm0
|
|
||||||
pxor 16(%esi, %edx), %xmm1
|
|
||||||
pxor 32(%esi, %edx), %xmm2
|
|
||||||
pxor 48(%esi, %edx), %xmm3
|
|
||||||
|
|
||||||
pxor %xmm4, %xmm0
|
|
||||||
pxor %xmm5, %xmm1
|
|
||||||
movdqa %xmm0, 0(%esp)
|
|
||||||
movdqa %xmm1, 16(%esp)
|
|
||||||
pxor %xmm6, %xmm2
|
|
||||||
pxor %xmm7, %xmm3
|
|
||||||
movdqa %xmm2, 32(%esp)
|
|
||||||
movdqa %xmm3, 48(%esp)
|
|
||||||
salsa8_core_sse2
|
|
||||||
paddd 0(%esp), %xmm0
|
|
||||||
paddd 16(%esp), %xmm1
|
|
||||||
paddd 32(%esp), %xmm2
|
|
||||||
paddd 48(%esp), %xmm3
|
|
||||||
movdqa %xmm0, 0(%esp)
|
|
||||||
movdqa %xmm1, 16(%esp)
|
|
||||||
movdqa %xmm2, 32(%esp)
|
|
||||||
movdqa %xmm3, 48(%esp)
|
|
||||||
|
|
||||||
pxor 64(%esi, %edx), %xmm0
|
|
||||||
pxor 80(%esi, %edx), %xmm1
|
|
||||||
pxor 96(%esi, %edx), %xmm2
|
|
||||||
pxor 112(%esi, %edx), %xmm3
|
|
||||||
pxor 64(%esp), %xmm0
|
|
||||||
pxor 80(%esp), %xmm1
|
|
||||||
pxor %xmm6, %xmm2
|
|
||||||
pxor %xmm7, %xmm3
|
|
||||||
movdqa %xmm0, 64(%esp)
|
|
||||||
movdqa %xmm1, 80(%esp)
|
|
||||||
movdqa %xmm2, %xmm6
|
|
||||||
movdqa %xmm3, %xmm7
|
|
||||||
salsa8_core_sse2
|
|
||||||
paddd 64(%esp), %xmm0
|
|
||||||
paddd 80(%esp), %xmm1
|
|
||||||
paddd %xmm2, %xmm6
|
|
||||||
paddd %xmm3, %xmm7
|
|
||||||
movdqa %xmm0, %xmm4
|
|
||||||
movdqa %xmm1, %xmm5
|
|
||||||
movdqa %xmm0, 64(%esp)
|
|
||||||
movdqa %xmm1, 80(%esp)
|
|
||||||
|
|
||||||
subl $1, %ecx
|
|
||||||
ja scrypt_core_sse2_loop2
|
|
||||||
|
|
||||||
movdqa %xmm6, 96(%esp)
|
|
||||||
movdqa %xmm7, 112(%esp)
|
|
||||||
|
|
||||||
scrypt_shuffle %esp, 0, %edi, 0
|
|
||||||
scrypt_shuffle %esp, 64, %edi, 64
|
|
||||||
|
|
||||||
movl %ebp, %esp
|
|
||||||
popl %esi
|
|
||||||
popl %edi
|
|
||||||
popl %ebp
|
|
||||||
popl %ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
#endif
|
|
@@ -1,767 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
|
||||||
* online backup system.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "../cpuminer-config.h"
|
|
||||||
#include "../miner.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
static const uint32_t keypad[12] = {
|
|
||||||
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
|
|
||||||
};
|
|
||||||
static const uint32_t innerpad[11] = {
|
|
||||||
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
|
|
||||||
};
|
|
||||||
static const uint32_t outerpad[8] = {
|
|
||||||
0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
|
|
||||||
};
|
|
||||||
static const uint32_t finalblk[16] = {
|
|
||||||
0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline void HMAC_SHA256_80_init(const uint32_t *key,
|
|
||||||
uint32_t *tstate, uint32_t *ostate)
|
|
||||||
{
|
|
||||||
uint32_t ihash[8];
|
|
||||||
uint32_t pad[16];
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* tstate is assumed to contain the midstate of key */
|
|
||||||
memcpy(pad, key + 16, 16);
|
|
||||||
memcpy(pad + 4, keypad, 48);
|
|
||||||
sha256_transform(tstate, pad, 0);
|
|
||||||
memcpy(ihash, tstate, 32);
|
|
||||||
|
|
||||||
sha256_init(ostate);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
|
||||||
for (; i < 16; i++)
|
|
||||||
pad[i] = 0x5c5c5c5c;
|
|
||||||
sha256_transform(ostate, pad, 0);
|
|
||||||
|
|
||||||
sha256_init(tstate);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
pad[i] = ihash[i] ^ 0x36363636;
|
|
||||||
for (; i < 16; i++)
|
|
||||||
pad[i] = 0x36363636;
|
|
||||||
sha256_transform(tstate, pad, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
|
|
||||||
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
|
||||||
{
|
|
||||||
uint32_t istate[8], ostate2[8];
|
|
||||||
uint32_t ibuf[16], obuf[16];
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
memcpy(istate, tstate, 32);
|
|
||||||
sha256_transform(istate, salt, 0);
|
|
||||||
|
|
||||||
memcpy(ibuf, salt + 16, 16);
|
|
||||||
memcpy(ibuf + 5, innerpad, 44);
|
|
||||||
memcpy(obuf + 8, outerpad, 32);
|
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
|
||||||
memcpy(obuf, istate, 32);
|
|
||||||
ibuf[4] = i + 1;
|
|
||||||
sha256_transform(obuf, ibuf, 0);
|
|
||||||
|
|
||||||
memcpy(ostate2, ostate, 32);
|
|
||||||
sha256_transform(ostate2, obuf, 0);
|
|
||||||
for (j = 0; j < 8; j++)
|
|
||||||
output[8 * i + j] = swab32(ostate2[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
|
|
||||||
const uint32_t *salt, uint32_t *output)
|
|
||||||
{
|
|
||||||
uint32_t buf[16];
|
|
||||||
int i;
|
|
||||||
|
|
||||||
sha256_transform(tstate, salt, 1);
|
|
||||||
sha256_transform(tstate, salt + 16, 1);
|
|
||||||
sha256_transform(tstate, finalblk, 0);
|
|
||||||
memcpy(buf, tstate, 32);
|
|
||||||
memcpy(buf + 8, outerpad, 32);
|
|
||||||
|
|
||||||
sha256_transform(ostate, buf, 0);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
output[i] = swab32(ostate[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
|
||||||
|
|
||||||
static const uint32_t keypad_4way[4 * 12] = {
|
|
||||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000280, 0x00000280, 0x00000280, 0x00000280
|
|
||||||
};
|
|
||||||
static const uint32_t innerpad_4way[4 * 11] = {
|
|
||||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
|
|
||||||
};
|
|
||||||
static const uint32_t outerpad_4way[4 * 8] = {
|
|
||||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000300, 0x00000300, 0x00000300, 0x00000300
|
|
||||||
};
|
|
||||||
static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = {
|
|
||||||
0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
|
||||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000620, 0x00000620, 0x00000620, 0x00000620
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
|
|
||||||
uint32_t *tstate, uint32_t *ostate)
|
|
||||||
{
|
|
||||||
uint32_t ihash[4 * 8] __attribute__((aligned(16)));
|
|
||||||
uint32_t pad[4 * 16] __attribute__((aligned(16)));
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* tstate is assumed to contain the midstate of key */
|
|
||||||
memcpy(pad, key + 4 * 16, 4 * 16);
|
|
||||||
memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
|
|
||||||
sha256_transform_4way(tstate, pad, 0);
|
|
||||||
memcpy(ihash, tstate, 4 * 32);
|
|
||||||
|
|
||||||
sha256_init_4way(ostate);
|
|
||||||
for (i = 0; i < 4 * 8; i++)
|
|
||||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
|
||||||
for (; i < 4 * 16; i++)
|
|
||||||
pad[i] = 0x5c5c5c5c;
|
|
||||||
sha256_transform_4way(ostate, pad, 0);
|
|
||||||
|
|
||||||
sha256_init_4way(tstate);
|
|
||||||
for (i = 0; i < 4 * 8; i++)
|
|
||||||
pad[i] = ihash[i] ^ 0x36363636;
|
|
||||||
for (; i < 4 * 16; i++)
|
|
||||||
pad[i] = 0x36363636;
|
|
||||||
sha256_transform_4way(tstate, pad, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
|
|
||||||
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
|
||||||
{
|
|
||||||
uint32_t istate[4 * 8] __attribute__((aligned(16)));
|
|
||||||
uint32_t ostate2[4 * 8] __attribute__((aligned(16)));
|
|
||||||
uint32_t ibuf[4 * 16] __attribute__((aligned(16)));
|
|
||||||
uint32_t obuf[4 * 16] __attribute__((aligned(16)));
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
memcpy(istate, tstate, 4 * 32);
|
|
||||||
sha256_transform_4way(istate, salt, 0);
|
|
||||||
|
|
||||||
memcpy(ibuf, salt + 4 * 16, 4 * 16);
|
|
||||||
memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
|
|
||||||
memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
|
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
|
||||||
memcpy(obuf, istate, 4 * 32);
|
|
||||||
ibuf[4 * 4 + 0] = i + 1;
|
|
||||||
ibuf[4 * 4 + 1] = i + 1;
|
|
||||||
ibuf[4 * 4 + 2] = i + 1;
|
|
||||||
ibuf[4 * 4 + 3] = i + 1;
|
|
||||||
sha256_transform_4way(obuf, ibuf, 0);
|
|
||||||
|
|
||||||
memcpy(ostate2, ostate, 4 * 32);
|
|
||||||
sha256_transform_4way(ostate2, obuf, 0);
|
|
||||||
for (j = 0; j < 4 * 8; j++)
|
|
||||||
output[4 * 8 * i + j] = swab32(ostate2[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
|
|
||||||
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
|
||||||
{
|
|
||||||
uint32_t buf[4 * 16] __attribute__((aligned(16)));
|
|
||||||
int i;
|
|
||||||
|
|
||||||
sha256_transform_4way(tstate, salt, 1);
|
|
||||||
sha256_transform_4way(tstate, salt + 4 * 16, 1);
|
|
||||||
sha256_transform_4way(tstate, finalblk_4way, 0);
|
|
||||||
memcpy(buf, tstate, 4 * 32);
|
|
||||||
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
|
|
||||||
|
|
||||||
sha256_transform_4way(ostate, buf, 0);
|
|
||||||
for (i = 0; i < 4 * 8; i++)
|
|
||||||
output[i] = swab32(ostate[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* HAVE_SHA256_4WAY */
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_8WAY
|
|
||||||
|
|
||||||
static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = {
|
|
||||||
0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
|
||||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
|
|
||||||
uint32_t *tstate, uint32_t *ostate)
|
|
||||||
{
|
|
||||||
uint32_t ihash[8 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t pad[8 * 16] __attribute__((aligned(32)));
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* tstate is assumed to contain the midstate of key */
|
|
||||||
memcpy(pad, key + 8 * 16, 8 * 16);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
pad[8 * 4 + i] = 0x80000000;
|
|
||||||
memset(pad + 8 * 5, 0x00, 8 * 40);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
pad[8 * 15 + i] = 0x00000280;
|
|
||||||
sha256_transform_8way(tstate, pad, 0);
|
|
||||||
memcpy(ihash, tstate, 8 * 32);
|
|
||||||
|
|
||||||
sha256_init_8way(ostate);
|
|
||||||
for (i = 0; i < 8 * 8; i++)
|
|
||||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
|
||||||
for (; i < 8 * 16; i++)
|
|
||||||
pad[i] = 0x5c5c5c5c;
|
|
||||||
sha256_transform_8way(ostate, pad, 0);
|
|
||||||
|
|
||||||
sha256_init_8way(tstate);
|
|
||||||
for (i = 0; i < 8 * 8; i++)
|
|
||||||
pad[i] = ihash[i] ^ 0x36363636;
|
|
||||||
for (; i < 8 * 16; i++)
|
|
||||||
pad[i] = 0x36363636;
|
|
||||||
sha256_transform_8way(tstate, pad, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
|
|
||||||
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
|
||||||
{
|
|
||||||
uint32_t istate[8 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t ostate2[8 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t ibuf[8 * 16] __attribute__((aligned(32)));
|
|
||||||
uint32_t obuf[8 * 16] __attribute__((aligned(32)));
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
memcpy(istate, tstate, 8 * 32);
|
|
||||||
sha256_transform_8way(istate, salt, 0);
|
|
||||||
|
|
||||||
memcpy(ibuf, salt + 8 * 16, 8 * 16);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
ibuf[8 * 5 + i] = 0x80000000;
|
|
||||||
memset(ibuf + 8 * 6, 0x00, 8 * 36);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
ibuf[8 * 15 + i] = 0x000004a0;
|
|
||||||
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
obuf[8 * 8 + i] = 0x80000000;
|
|
||||||
memset(obuf + 8 * 9, 0x00, 8 * 24);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
obuf[8 * 15 + i] = 0x00000300;
|
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
|
||||||
memcpy(obuf, istate, 8 * 32);
|
|
||||||
ibuf[8 * 4 + 0] = i + 1;
|
|
||||||
ibuf[8 * 4 + 1] = i + 1;
|
|
||||||
ibuf[8 * 4 + 2] = i + 1;
|
|
||||||
ibuf[8 * 4 + 3] = i + 1;
|
|
||||||
ibuf[8 * 4 + 4] = i + 1;
|
|
||||||
ibuf[8 * 4 + 5] = i + 1;
|
|
||||||
ibuf[8 * 4 + 6] = i + 1;
|
|
||||||
ibuf[8 * 4 + 7] = i + 1;
|
|
||||||
sha256_transform_8way(obuf, ibuf, 0);
|
|
||||||
|
|
||||||
memcpy(ostate2, ostate, 8 * 32);
|
|
||||||
sha256_transform_8way(ostate2, obuf, 0);
|
|
||||||
for (j = 0; j < 8 * 8; j++)
|
|
||||||
output[8 * 8 * i + j] = swab32(ostate2[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
|
|
||||||
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
|
||||||
{
|
|
||||||
uint32_t buf[8 * 16] __attribute__((aligned(32)));
|
|
||||||
int i;
|
|
||||||
|
|
||||||
sha256_transform_8way(tstate, salt, 1);
|
|
||||||
sha256_transform_8way(tstate, salt + 8 * 16, 1);
|
|
||||||
sha256_transform_8way(tstate, finalblk_8way, 0);
|
|
||||||
|
|
||||||
memcpy(buf, tstate, 8 * 32);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
buf[8 * 8 + i] = 0x80000000;
|
|
||||||
memset(buf + 8 * 9, 0x00, 8 * 24);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
buf[8 * 15 + i] = 0x00000300;
|
|
||||||
sha256_transform_8way(ostate, buf, 0);
|
|
||||||
|
|
||||||
for (i = 0; i < 8 * 8; i++)
|
|
||||||
output[i] = swab32(ostate[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* HAVE_SHA256_8WAY */
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__x86_64__)
|
|
||||||
|
|
||||||
#define SCRYPT_MAX_WAYS 12
|
|
||||||
#define HAVE_SCRYPT_3WAY 1
|
|
||||||
int scrypt_best_throughput();
|
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
|
||||||
void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
|
||||||
#if defined(USE_AVX2)
|
|
||||||
#undef SCRYPT_MAX_WAYS
|
|
||||||
#define SCRYPT_MAX_WAYS 24
|
|
||||||
#define HAVE_SCRYPT_6WAY 1
|
|
||||||
void scrypt_core_6way(uint32_t *X, uint32_t *V);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif defined(__i386__)
|
|
||||||
|
|
||||||
#define SCRYPT_MAX_WAYS 4
|
|
||||||
#define scrypt_best_throughput() 1
|
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
|
||||||
|
|
||||||
#elif defined(__arm__) && defined(__APCS_32__)
|
|
||||||
|
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
|
||||||
#if defined(__ARM_NEON__)
|
|
||||||
#undef HAVE_SHA256_4WAY
|
|
||||||
#define SCRYPT_MAX_WAYS 3
|
|
||||||
#define HAVE_SCRYPT_3WAY 1
|
|
||||||
#define scrypt_best_throughput() 3
|
|
||||||
void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
|
|
||||||
{
|
|
||||||
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
x00 = (B[ 0] ^= Bx[ 0]);
|
|
||||||
x01 = (B[ 1] ^= Bx[ 1]);
|
|
||||||
x02 = (B[ 2] ^= Bx[ 2]);
|
|
||||||
x03 = (B[ 3] ^= Bx[ 3]);
|
|
||||||
x04 = (B[ 4] ^= Bx[ 4]);
|
|
||||||
x05 = (B[ 5] ^= Bx[ 5]);
|
|
||||||
x06 = (B[ 6] ^= Bx[ 6]);
|
|
||||||
x07 = (B[ 7] ^= Bx[ 7]);
|
|
||||||
x08 = (B[ 8] ^= Bx[ 8]);
|
|
||||||
x09 = (B[ 9] ^= Bx[ 9]);
|
|
||||||
x10 = (B[10] ^= Bx[10]);
|
|
||||||
x11 = (B[11] ^= Bx[11]);
|
|
||||||
x12 = (B[12] ^= Bx[12]);
|
|
||||||
x13 = (B[13] ^= Bx[13]);
|
|
||||||
x14 = (B[14] ^= Bx[14]);
|
|
||||||
x15 = (B[15] ^= Bx[15]);
|
|
||||||
for (i = 0; i < 8; i += 2) {
|
|
||||||
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
|
|
||||||
/* Operate on columns. */
|
|
||||||
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
|
|
||||||
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
|
|
||||||
|
|
||||||
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
|
|
||||||
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
|
|
||||||
|
|
||||||
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
|
|
||||||
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
|
|
||||||
|
|
||||||
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
|
|
||||||
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
|
|
||||||
|
|
||||||
/* Operate on rows. */
|
|
||||||
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
|
|
||||||
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
|
|
||||||
|
|
||||||
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
|
|
||||||
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
|
|
||||||
|
|
||||||
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
|
|
||||||
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
|
|
||||||
|
|
||||||
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
|
|
||||||
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
|
|
||||||
#undef R
|
|
||||||
}
|
|
||||||
B[ 0] += x00;
|
|
||||||
B[ 1] += x01;
|
|
||||||
B[ 2] += x02;
|
|
||||||
B[ 3] += x03;
|
|
||||||
B[ 4] += x04;
|
|
||||||
B[ 5] += x05;
|
|
||||||
B[ 6] += x06;
|
|
||||||
B[ 7] += x07;
|
|
||||||
B[ 8] += x08;
|
|
||||||
B[ 9] += x09;
|
|
||||||
B[10] += x10;
|
|
||||||
B[11] += x11;
|
|
||||||
B[12] += x12;
|
|
||||||
B[13] += x13;
|
|
||||||
B[14] += x14;
|
|
||||||
B[15] += x15;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void scrypt_core(uint32_t *X, uint32_t *V)
|
|
||||||
{
|
|
||||||
uint32_t i, j, k;
|
|
||||||
|
|
||||||
for (i = 0; i < 1024; i++) {
|
|
||||||
memcpy(&V[i * 32], X, 128);
|
|
||||||
xor_salsa8(&X[0], &X[16]);
|
|
||||||
xor_salsa8(&X[16], &X[0]);
|
|
||||||
}
|
|
||||||
for (i = 0; i < 1024; i++) {
|
|
||||||
j = 32 * (X[16] & 1023);
|
|
||||||
for (k = 0; k < 32; k++)
|
|
||||||
X[k] ^= V[j + k];
|
|
||||||
xor_salsa8(&X[0], &X[16]);
|
|
||||||
xor_salsa8(&X[16], &X[0]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef SCRYPT_MAX_WAYS
|
|
||||||
#define SCRYPT_MAX_WAYS 1
|
|
||||||
#define scrypt_best_throughput() 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63)
|
|
||||||
|
|
||||||
unsigned char *scrypt_buffer_alloc()
|
|
||||||
{
|
|
||||||
return malloc(SCRYPT_BUFFER_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
|
||||||
uint32_t *midstate, unsigned char *scratchpad)
|
|
||||||
{
|
|
||||||
uint32_t tstate[8], ostate[8];
|
|
||||||
uint32_t X[32];
|
|
||||||
uint32_t *V;
|
|
||||||
|
|
||||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
|
||||||
|
|
||||||
memcpy(tstate, midstate, 32);
|
|
||||||
HMAC_SHA256_80_init(input, tstate, ostate);
|
|
||||||
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
|
||||||
|
|
||||||
scrypt_core(X, V);
|
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
|
||||||
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
|
||||||
{
|
|
||||||
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
|
|
||||||
uint32_t ostate[4 * 8] __attribute__((aligned(128)));
|
|
||||||
uint32_t W[4 * 32] __attribute__((aligned(128)));
|
|
||||||
uint32_t X[4 * 32] __attribute__((aligned(128)));
|
|
||||||
uint32_t *V;
|
|
||||||
int i, k;
|
|
||||||
|
|
||||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
|
||||||
|
|
||||||
for (i = 0; i < 20; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
W[4 * i + k] = input[k * 20 + i];
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
tstate[4 * i + k] = midstate[i];
|
|
||||||
HMAC_SHA256_80_init_4way(W, tstate, ostate);
|
|
||||||
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
|
|
||||||
for (i = 0; i < 32; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
X[k * 32 + i] = W[4 * i + k];
|
|
||||||
scrypt_core(X + 0 * 32, V);
|
|
||||||
scrypt_core(X + 1 * 32, V);
|
|
||||||
scrypt_core(X + 2 * 32, V);
|
|
||||||
scrypt_core(X + 3 * 32, V);
|
|
||||||
for (i = 0; i < 32; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
W[4 * i + k] = X[k * 32 + i];
|
|
||||||
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
output[k * 8 + i] = W[4 * i + k];
|
|
||||||
}
|
|
||||||
#endif /* HAVE_SHA256_4WAY */
|
|
||||||
|
|
||||||
#ifdef HAVE_SCRYPT_3WAY
|
|
||||||
|
|
||||||
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
|
||||||
{
|
|
||||||
uint32_t tstate[3 * 8], ostate[3 * 8];
|
|
||||||
uint32_t X[3 * 32] __attribute__((aligned(64)));
|
|
||||||
uint32_t *V;
|
|
||||||
|
|
||||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
|
||||||
|
|
||||||
memcpy(tstate + 0, midstate, 32);
|
|
||||||
memcpy(tstate + 8, midstate, 32);
|
|
||||||
memcpy(tstate + 16, midstate, 32);
|
|
||||||
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
|
|
||||||
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
|
|
||||||
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
|
|
||||||
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
|
|
||||||
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
|
|
||||||
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
|
|
||||||
|
|
||||||
scrypt_core_3way(X, V);
|
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
|
|
||||||
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
|
|
||||||
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
|
||||||
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
|
||||||
{
|
|
||||||
uint32_t tstate[12 * 8] __attribute__((aligned(128)));
|
|
||||||
uint32_t ostate[12 * 8] __attribute__((aligned(128)));
|
|
||||||
uint32_t W[12 * 32] __attribute__((aligned(128)));
|
|
||||||
uint32_t X[12 * 32] __attribute__((aligned(128)));
|
|
||||||
uint32_t *V;
|
|
||||||
int i, j, k;
|
|
||||||
|
|
||||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
|
||||||
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 20; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
tstate[32 * j + 4 * i + k] = midstate[i];
|
|
||||||
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
|
|
||||||
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
|
|
||||||
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
|
|
||||||
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
|
||||||
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
|
||||||
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 32; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
|
|
||||||
scrypt_core_3way(X + 0 * 96, V);
|
|
||||||
scrypt_core_3way(X + 1 * 96, V);
|
|
||||||
scrypt_core_3way(X + 2 * 96, V);
|
|
||||||
scrypt_core_3way(X + 3 * 96, V);
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 32; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
|
|
||||||
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
|
||||||
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
|
||||||
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
for (k = 0; k < 4; k++)
|
|
||||||
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
|
|
||||||
}
|
|
||||||
#endif /* HAVE_SHA256_4WAY */
|
|
||||||
|
|
||||||
#endif /* HAVE_SCRYPT_3WAY */
|
|
||||||
|
|
||||||
#ifdef HAVE_SCRYPT_6WAY
|
|
||||||
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
|
||||||
{
|
|
||||||
uint32_t tstate[24 * 8] __attribute__((aligned(128)));
|
|
||||||
uint32_t ostate[24 * 8] __attribute__((aligned(128)));
|
|
||||||
uint32_t W[24 * 32] __attribute__((aligned(128)));
|
|
||||||
uint32_t X[24 * 32] __attribute__((aligned(128)));
|
|
||||||
uint32_t *V;
|
|
||||||
int i, j, k;
|
|
||||||
|
|
||||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
|
||||||
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 20; i++)
|
|
||||||
for (k = 0; k < 8; k++)
|
|
||||||
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
for (k = 0; k < 8; k++)
|
|
||||||
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
|
|
||||||
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
|
|
||||||
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
|
|
||||||
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
|
|
||||||
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
|
|
||||||
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
|
|
||||||
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 32; i++)
|
|
||||||
for (k = 0; k < 8; k++)
|
|
||||||
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
|
|
||||||
scrypt_core_6way(X + 0 * 32, V);
|
|
||||||
scrypt_core_6way(X + 6 * 32, V);
|
|
||||||
scrypt_core_6way(X + 12 * 32, V);
|
|
||||||
scrypt_core_6way(X + 18 * 32, V);
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 32; i++)
|
|
||||||
for (k = 0; k < 8; k++)
|
|
||||||
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
|
|
||||||
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
|
|
||||||
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
|
|
||||||
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
|
|
||||||
for (j = 0; j < 3; j++)
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
for (k = 0; k < 8; k++)
|
|
||||||
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
|
|
||||||
}
|
|
||||||
#endif /* HAVE_SCRYPT_6WAY */
|
|
||||||
|
|
||||||
int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
|
||||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
|
||||||
uint32_t max_nonce, unsigned long *hashes_done)
|
|
||||||
{
|
|
||||||
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
|
||||||
uint32_t midstate[8];
|
|
||||||
uint32_t n = pdata[19] - 1;
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
int throughput = scrypt_best_throughput();
|
|
||||||
int i;
|
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
|
||||||
if (sha256_use_4way())
|
|
||||||
throughput *= 4;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (i = 0; i < throughput; i++)
|
|
||||||
memcpy(data + i * 20, pdata, 80);
|
|
||||||
|
|
||||||
sha256_init(midstate);
|
|
||||||
sha256_transform(midstate, data, 0);
|
|
||||||
|
|
||||||
do {
|
|
||||||
for (i = 0; i < throughput; i++)
|
|
||||||
data[i * 20 + 19] = ++n;
|
|
||||||
|
|
||||||
#if defined(HAVE_SHA256_4WAY)
|
|
||||||
if (throughput == 4)
|
|
||||||
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf);
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
|
|
||||||
if (throughput == 12)
|
|
||||||
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf);
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
#if defined(HAVE_SCRYPT_6WAY)
|
|
||||||
if (throughput == 24)
|
|
||||||
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf);
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
#if defined(HAVE_SCRYPT_3WAY)
|
|
||||||
if (throughput == 3)
|
|
||||||
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf);
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf);
|
|
||||||
|
|
||||||
for (i = 0; i < throughput; i++) {
|
|
||||||
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
|
|
||||||
*hashes_done = n - pdata[19] + 1;
|
|
||||||
pdata[19] = data[i * 20 + 19];
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - pdata[19] + 1;
|
|
||||||
pdata[19] = n;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool register_scrypt_algo( algo_gate_t* gate )
|
|
||||||
{
|
|
||||||
gate->scanhash = &scanhash_scrypt;
|
|
||||||
gate->hash = &scrypt_hash;
|
|
||||||
// gate->get_max64 = scrypt_get_max64;
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,630 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2011 ArtForz
|
|
||||||
* Copyright 2011-2013 pooler
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License as published by the Free
|
|
||||||
* Software Foundation; either version 2 of the License, or (at your option)
|
|
||||||
* any later version. See COPYING for more details.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "../cpuminer-config.h"
|
|
||||||
#include "../miner.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#if defined(__arm__) && defined(__APCS_32__)
|
|
||||||
#define EXTERN_SHA256
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static const uint32_t sha256_h[8] = {
|
|
||||||
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
|
|
||||||
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
|
|
||||||
};
|
|
||||||
|
|
||||||
static const uint32_t sha256_k[64] = {
|
|
||||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
|
||||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
|
||||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
|
||||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
|
||||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
|
||||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
|
||||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
|
||||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
|
||||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
|
||||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
|
||||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
|
||||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
|
||||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
|
||||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
|
||||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
|
||||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
|
||||||
};
|
|
||||||
|
|
||||||
void sha256_init(uint32_t *state)
|
|
||||||
{
|
|
||||||
memcpy(state, sha256_h, 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Elementary functions used by SHA256 */
|
|
||||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
|
||||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
|
||||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
|
||||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
|
||||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
|
||||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
|
|
||||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
|
|
||||||
|
|
||||||
/* SHA256 round function */
|
|
||||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
|
||||||
do { \
|
|
||||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
|
||||||
t1 = S0(a) + Maj(a, b, c); \
|
|
||||||
d += t0; \
|
|
||||||
h = t0 + t1; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/* Adjusted round function for rotating state */
|
|
||||||
#define RNDr(S, W, i) \
|
|
||||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
|
||||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
|
||||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
|
||||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
|
||||||
W[i] + sha256_k[i])
|
|
||||||
|
|
||||||
#ifndef EXTERN_SHA256
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SHA256 block compression function. The 256-bit state is transformed via
|
|
||||||
* the 512-bit input block to produce a new state.
|
|
||||||
*/
|
|
||||||
void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
|
|
||||||
{
|
|
||||||
uint32_t W[64];
|
|
||||||
uint32_t S[8];
|
|
||||||
uint32_t t0, t1;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* 1. Prepare message schedule W. */
|
|
||||||
if (swap) {
|
|
||||||
for (i = 0; i < 16; i++)
|
|
||||||
W[i] = swab32(block[i]);
|
|
||||||
} else
|
|
||||||
memcpy(W, block, 64);
|
|
||||||
for (i = 16; i < 64; i += 2) {
|
|
||||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
|
||||||
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 2. Initialize working variables. */
|
|
||||||
memcpy(S, state, 32);
|
|
||||||
|
|
||||||
/* 3. Mix. */
|
|
||||||
RNDr(S, W, 0);
|
|
||||||
RNDr(S, W, 1);
|
|
||||||
RNDr(S, W, 2);
|
|
||||||
RNDr(S, W, 3);
|
|
||||||
RNDr(S, W, 4);
|
|
||||||
RNDr(S, W, 5);
|
|
||||||
RNDr(S, W, 6);
|
|
||||||
RNDr(S, W, 7);
|
|
||||||
RNDr(S, W, 8);
|
|
||||||
RNDr(S, W, 9);
|
|
||||||
RNDr(S, W, 10);
|
|
||||||
RNDr(S, W, 11);
|
|
||||||
RNDr(S, W, 12);
|
|
||||||
RNDr(S, W, 13);
|
|
||||||
RNDr(S, W, 14);
|
|
||||||
RNDr(S, W, 15);
|
|
||||||
RNDr(S, W, 16);
|
|
||||||
RNDr(S, W, 17);
|
|
||||||
RNDr(S, W, 18);
|
|
||||||
RNDr(S, W, 19);
|
|
||||||
RNDr(S, W, 20);
|
|
||||||
RNDr(S, W, 21);
|
|
||||||
RNDr(S, W, 22);
|
|
||||||
RNDr(S, W, 23);
|
|
||||||
RNDr(S, W, 24);
|
|
||||||
RNDr(S, W, 25);
|
|
||||||
RNDr(S, W, 26);
|
|
||||||
RNDr(S, W, 27);
|
|
||||||
RNDr(S, W, 28);
|
|
||||||
RNDr(S, W, 29);
|
|
||||||
RNDr(S, W, 30);
|
|
||||||
RNDr(S, W, 31);
|
|
||||||
RNDr(S, W, 32);
|
|
||||||
RNDr(S, W, 33);
|
|
||||||
RNDr(S, W, 34);
|
|
||||||
RNDr(S, W, 35);
|
|
||||||
RNDr(S, W, 36);
|
|
||||||
RNDr(S, W, 37);
|
|
||||||
RNDr(S, W, 38);
|
|
||||||
RNDr(S, W, 39);
|
|
||||||
RNDr(S, W, 40);
|
|
||||||
RNDr(S, W, 41);
|
|
||||||
RNDr(S, W, 42);
|
|
||||||
RNDr(S, W, 43);
|
|
||||||
RNDr(S, W, 44);
|
|
||||||
RNDr(S, W, 45);
|
|
||||||
RNDr(S, W, 46);
|
|
||||||
RNDr(S, W, 47);
|
|
||||||
RNDr(S, W, 48);
|
|
||||||
RNDr(S, W, 49);
|
|
||||||
RNDr(S, W, 50);
|
|
||||||
RNDr(S, W, 51);
|
|
||||||
RNDr(S, W, 52);
|
|
||||||
RNDr(S, W, 53);
|
|
||||||
RNDr(S, W, 54);
|
|
||||||
RNDr(S, W, 55);
|
|
||||||
RNDr(S, W, 56);
|
|
||||||
RNDr(S, W, 57);
|
|
||||||
RNDr(S, W, 58);
|
|
||||||
RNDr(S, W, 59);
|
|
||||||
RNDr(S, W, 60);
|
|
||||||
RNDr(S, W, 61);
|
|
||||||
RNDr(S, W, 62);
|
|
||||||
RNDr(S, W, 63);
|
|
||||||
|
|
||||||
/* 4. Mix local working variables into global state */
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
state[i] += S[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* EXTERN_SHA256 */
|
|
||||||
|
|
||||||
|
|
||||||
static const uint32_t sha256d_hash1[16] = {
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x80000000, 0x00000000, 0x00000000, 0x00000000,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000100
|
|
||||||
};
|
|
||||||
|
|
||||||
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
|
||||||
{
|
|
||||||
uint32_t S[16];
|
|
||||||
int i;
|
|
||||||
|
|
||||||
sha256_init(S);
|
|
||||||
sha256_transform(S, data, 0);
|
|
||||||
sha256_transform(S, data + 16, 0);
|
|
||||||
memcpy(S + 8, sha256d_hash1 + 8, 32);
|
|
||||||
sha256_init(hash);
|
|
||||||
sha256_transform(hash, S, 0);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
hash[i] = swab32(hash[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
void sha256d(unsigned char *hash, const unsigned char *data, int len)
|
|
||||||
{
|
|
||||||
uint32_t S[16], T[16];
|
|
||||||
int i, r;
|
|
||||||
|
|
||||||
sha256_init(S);
|
|
||||||
for (r = len; r > -9; r -= 64) {
|
|
||||||
if (r < 64)
|
|
||||||
memset(T, 0, 64);
|
|
||||||
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
|
|
||||||
if (r >= 0 && r < 64)
|
|
||||||
((unsigned char *)T)[r] = 0x80;
|
|
||||||
for (i = 0; i < 16; i++)
|
|
||||||
T[i] = be32dec(T + i);
|
|
||||||
if (r < 56)
|
|
||||||
T[15] = 8 * len;
|
|
||||||
sha256_transform(S, T, 0);
|
|
||||||
}
|
|
||||||
memcpy(S + 8, sha256d_hash1 + 8, 32);
|
|
||||||
sha256_init(T);
|
|
||||||
sha256_transform(T, S, 0);
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
be32enc((uint32_t *)hash + i, T[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void sha256d_preextend(uint32_t *W)
|
|
||||||
{
|
|
||||||
W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0];
|
|
||||||
W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1];
|
|
||||||
W[18] = s1(W[16]) + W[11] + W[ 2];
|
|
||||||
W[19] = s1(W[17]) + W[12] + s0(W[ 4]);
|
|
||||||
W[20] = W[13] + s0(W[ 5]) + W[ 4];
|
|
||||||
W[21] = W[14] + s0(W[ 6]) + W[ 5];
|
|
||||||
W[22] = W[15] + s0(W[ 7]) + W[ 6];
|
|
||||||
W[23] = W[16] + s0(W[ 8]) + W[ 7];
|
|
||||||
W[24] = W[17] + s0(W[ 9]) + W[ 8];
|
|
||||||
W[25] = s0(W[10]) + W[ 9];
|
|
||||||
W[26] = s0(W[11]) + W[10];
|
|
||||||
W[27] = s0(W[12]) + W[11];
|
|
||||||
W[28] = s0(W[13]) + W[12];
|
|
||||||
W[29] = s0(W[14]) + W[13];
|
|
||||||
W[30] = s0(W[15]) + W[14];
|
|
||||||
W[31] = s0(W[16]) + W[15];
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void sha256d_prehash(uint32_t *S, const uint32_t *W)
|
|
||||||
{
|
|
||||||
uint32_t t0, t1;
|
|
||||||
RNDr(S, W, 0);
|
|
||||||
RNDr(S, W, 1);
|
|
||||||
RNDr(S, W, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef EXTERN_SHA256
|
|
||||||
|
|
||||||
void sha256d_ms(uint32_t *hash, uint32_t *W,
|
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
|
|
||||||
const uint32_t *midstate, const uint32_t *prehash)
|
|
||||||
{
|
|
||||||
uint32_t S[64];
|
|
||||||
uint32_t t0, t1;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
S[18] = W[18];
|
|
||||||
S[19] = W[19];
|
|
||||||
S[20] = W[20];
|
|
||||||
S[22] = W[22];
|
|
||||||
S[23] = W[23];
|
|
||||||
S[24] = W[24];
|
|
||||||
S[30] = W[30];
|
|
||||||
S[31] = W[31];
|
|
||||||
|
|
||||||
W[18] += s0(W[3]);
|
|
||||||
W[19] += W[3];
|
|
||||||
W[20] += s1(W[18]);
|
|
||||||
W[21] = s1(W[19]);
|
|
||||||
W[22] += s1(W[20]);
|
|
||||||
W[23] += s1(W[21]);
|
|
||||||
W[24] += s1(W[22]);
|
|
||||||
W[25] = s1(W[23]) + W[18];
|
|
||||||
W[26] = s1(W[24]) + W[19];
|
|
||||||
W[27] = s1(W[25]) + W[20];
|
|
||||||
W[28] = s1(W[26]) + W[21];
|
|
||||||
W[29] = s1(W[27]) + W[22];
|
|
||||||
W[30] += s1(W[28]) + W[23];
|
|
||||||
W[31] += s1(W[29]) + W[24];
|
|
||||||
for (i = 32; i < 64; i += 2) {
|
|
||||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
|
||||||
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(S, prehash, 32);
|
|
||||||
|
|
||||||
RNDr(S, W, 3);
|
|
||||||
RNDr(S, W, 4);
|
|
||||||
RNDr(S, W, 5);
|
|
||||||
RNDr(S, W, 6);
|
|
||||||
RNDr(S, W, 7);
|
|
||||||
RNDr(S, W, 8);
|
|
||||||
RNDr(S, W, 9);
|
|
||||||
RNDr(S, W, 10);
|
|
||||||
RNDr(S, W, 11);
|
|
||||||
RNDr(S, W, 12);
|
|
||||||
RNDr(S, W, 13);
|
|
||||||
RNDr(S, W, 14);
|
|
||||||
RNDr(S, W, 15);
|
|
||||||
RNDr(S, W, 16);
|
|
||||||
RNDr(S, W, 17);
|
|
||||||
RNDr(S, W, 18);
|
|
||||||
RNDr(S, W, 19);
|
|
||||||
RNDr(S, W, 20);
|
|
||||||
RNDr(S, W, 21);
|
|
||||||
RNDr(S, W, 22);
|
|
||||||
RNDr(S, W, 23);
|
|
||||||
RNDr(S, W, 24);
|
|
||||||
RNDr(S, W, 25);
|
|
||||||
RNDr(S, W, 26);
|
|
||||||
RNDr(S, W, 27);
|
|
||||||
RNDr(S, W, 28);
|
|
||||||
RNDr(S, W, 29);
|
|
||||||
RNDr(S, W, 30);
|
|
||||||
RNDr(S, W, 31);
|
|
||||||
RNDr(S, W, 32);
|
|
||||||
RNDr(S, W, 33);
|
|
||||||
RNDr(S, W, 34);
|
|
||||||
RNDr(S, W, 35);
|
|
||||||
RNDr(S, W, 36);
|
|
||||||
RNDr(S, W, 37);
|
|
||||||
RNDr(S, W, 38);
|
|
||||||
RNDr(S, W, 39);
|
|
||||||
RNDr(S, W, 40);
|
|
||||||
RNDr(S, W, 41);
|
|
||||||
RNDr(S, W, 42);
|
|
||||||
RNDr(S, W, 43);
|
|
||||||
RNDr(S, W, 44);
|
|
||||||
RNDr(S, W, 45);
|
|
||||||
RNDr(S, W, 46);
|
|
||||||
RNDr(S, W, 47);
|
|
||||||
RNDr(S, W, 48);
|
|
||||||
RNDr(S, W, 49);
|
|
||||||
RNDr(S, W, 50);
|
|
||||||
RNDr(S, W, 51);
|
|
||||||
RNDr(S, W, 52);
|
|
||||||
RNDr(S, W, 53);
|
|
||||||
RNDr(S, W, 54);
|
|
||||||
RNDr(S, W, 55);
|
|
||||||
RNDr(S, W, 56);
|
|
||||||
RNDr(S, W, 57);
|
|
||||||
RNDr(S, W, 58);
|
|
||||||
RNDr(S, W, 59);
|
|
||||||
RNDr(S, W, 60);
|
|
||||||
RNDr(S, W, 61);
|
|
||||||
RNDr(S, W, 62);
|
|
||||||
RNDr(S, W, 63);
|
|
||||||
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
S[i] += midstate[i];
|
|
||||||
|
|
||||||
W[18] = S[18];
|
|
||||||
W[19] = S[19];
|
|
||||||
W[20] = S[20];
|
|
||||||
W[22] = S[22];
|
|
||||||
W[23] = S[23];
|
|
||||||
W[24] = S[24];
|
|
||||||
W[30] = S[30];
|
|
||||||
W[31] = S[31];
|
|
||||||
|
|
||||||
memcpy(S + 8, sha256d_hash1 + 8, 32);
|
|
||||||
S[16] = s1(sha256d_hash1[14]) + sha256d_hash1[ 9] + s0(S[ 1]) + S[ 0];
|
|
||||||
S[17] = s1(sha256d_hash1[15]) + sha256d_hash1[10] + s0(S[ 2]) + S[ 1];
|
|
||||||
S[18] = s1(S[16]) + sha256d_hash1[11] + s0(S[ 3]) + S[ 2];
|
|
||||||
S[19] = s1(S[17]) + sha256d_hash1[12] + s0(S[ 4]) + S[ 3];
|
|
||||||
S[20] = s1(S[18]) + sha256d_hash1[13] + s0(S[ 5]) + S[ 4];
|
|
||||||
S[21] = s1(S[19]) + sha256d_hash1[14] + s0(S[ 6]) + S[ 5];
|
|
||||||
S[22] = s1(S[20]) + sha256d_hash1[15] + s0(S[ 7]) + S[ 6];
|
|
||||||
S[23] = s1(S[21]) + S[16] + s0(sha256d_hash1[ 8]) + S[ 7];
|
|
||||||
S[24] = s1(S[22]) + S[17] + s0(sha256d_hash1[ 9]) + sha256d_hash1[ 8];
|
|
||||||
S[25] = s1(S[23]) + S[18] + s0(sha256d_hash1[10]) + sha256d_hash1[ 9];
|
|
||||||
S[26] = s1(S[24]) + S[19] + s0(sha256d_hash1[11]) + sha256d_hash1[10];
|
|
||||||
S[27] = s1(S[25]) + S[20] + s0(sha256d_hash1[12]) + sha256d_hash1[11];
|
|
||||||
S[28] = s1(S[26]) + S[21] + s0(sha256d_hash1[13]) + sha256d_hash1[12];
|
|
||||||
S[29] = s1(S[27]) + S[22] + s0(sha256d_hash1[14]) + sha256d_hash1[13];
|
|
||||||
S[30] = s1(S[28]) + S[23] + s0(sha256d_hash1[15]) + sha256d_hash1[14];
|
|
||||||
S[31] = s1(S[29]) + S[24] + s0(S[16]) + sha256d_hash1[15];
|
|
||||||
for (i = 32; i < 60; i += 2) {
|
|
||||||
S[i] = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16];
|
|
||||||
S[i+1] = s1(S[i - 1]) + S[i - 6] + s0(S[i - 14]) + S[i - 15];
|
|
||||||
}
|
|
||||||
S[60] = s1(S[58]) + S[53] + s0(S[45]) + S[44];
|
|
||||||
|
|
||||||
sha256_init(hash);
|
|
||||||
|
|
||||||
RNDr(hash, S, 0);
|
|
||||||
RNDr(hash, S, 1);
|
|
||||||
RNDr(hash, S, 2);
|
|
||||||
RNDr(hash, S, 3);
|
|
||||||
RNDr(hash, S, 4);
|
|
||||||
RNDr(hash, S, 5);
|
|
||||||
RNDr(hash, S, 6);
|
|
||||||
RNDr(hash, S, 7);
|
|
||||||
RNDr(hash, S, 8);
|
|
||||||
RNDr(hash, S, 9);
|
|
||||||
RNDr(hash, S, 10);
|
|
||||||
RNDr(hash, S, 11);
|
|
||||||
RNDr(hash, S, 12);
|
|
||||||
RNDr(hash, S, 13);
|
|
||||||
RNDr(hash, S, 14);
|
|
||||||
RNDr(hash, S, 15);
|
|
||||||
RNDr(hash, S, 16);
|
|
||||||
RNDr(hash, S, 17);
|
|
||||||
RNDr(hash, S, 18);
|
|
||||||
RNDr(hash, S, 19);
|
|
||||||
RNDr(hash, S, 20);
|
|
||||||
RNDr(hash, S, 21);
|
|
||||||
RNDr(hash, S, 22);
|
|
||||||
RNDr(hash, S, 23);
|
|
||||||
RNDr(hash, S, 24);
|
|
||||||
RNDr(hash, S, 25);
|
|
||||||
RNDr(hash, S, 26);
|
|
||||||
RNDr(hash, S, 27);
|
|
||||||
RNDr(hash, S, 28);
|
|
||||||
RNDr(hash, S, 29);
|
|
||||||
RNDr(hash, S, 30);
|
|
||||||
RNDr(hash, S, 31);
|
|
||||||
RNDr(hash, S, 32);
|
|
||||||
RNDr(hash, S, 33);
|
|
||||||
RNDr(hash, S, 34);
|
|
||||||
RNDr(hash, S, 35);
|
|
||||||
RNDr(hash, S, 36);
|
|
||||||
RNDr(hash, S, 37);
|
|
||||||
RNDr(hash, S, 38);
|
|
||||||
RNDr(hash, S, 39);
|
|
||||||
RNDr(hash, S, 40);
|
|
||||||
RNDr(hash, S, 41);
|
|
||||||
RNDr(hash, S, 42);
|
|
||||||
RNDr(hash, S, 43);
|
|
||||||
RNDr(hash, S, 44);
|
|
||||||
RNDr(hash, S, 45);
|
|
||||||
RNDr(hash, S, 46);
|
|
||||||
RNDr(hash, S, 47);
|
|
||||||
RNDr(hash, S, 48);
|
|
||||||
RNDr(hash, S, 49);
|
|
||||||
RNDr(hash, S, 50);
|
|
||||||
RNDr(hash, S, 51);
|
|
||||||
RNDr(hash, S, 52);
|
|
||||||
RNDr(hash, S, 53);
|
|
||||||
RNDr(hash, S, 54);
|
|
||||||
RNDr(hash, S, 55);
|
|
||||||
RNDr(hash, S, 56);
|
|
||||||
|
|
||||||
hash[2] += hash[6] + S1(hash[3]) + Ch(hash[3], hash[4], hash[5])
|
|
||||||
+ S[57] + sha256_k[57];
|
|
||||||
hash[1] += hash[5] + S1(hash[2]) + Ch(hash[2], hash[3], hash[4])
|
|
||||||
+ S[58] + sha256_k[58];
|
|
||||||
hash[0] += hash[4] + S1(hash[1]) + Ch(hash[1], hash[2], hash[3])
|
|
||||||
+ S[59] + sha256_k[59];
|
|
||||||
hash[7] += hash[3] + S1(hash[0]) + Ch(hash[0], hash[1], hash[2])
|
|
||||||
+ S[60] + sha256_k[60]
|
|
||||||
+ sha256_h[7];
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* EXTERN_SHA256 */
|
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
|
||||||
|
|
||||||
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
|
||||||
|
|
||||||
static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata,
|
|
||||||
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
|
|
||||||
{
|
|
||||||
uint32_t data[4 * 64] __attribute__((aligned(128)));
|
|
||||||
uint32_t hash[4 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t midstate[4 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t prehash[4 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t n = pdata[19] - 1;
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
|
||||||
sha256d_preextend(data);
|
|
||||||
for (i = 31; i >= 0; i--)
|
|
||||||
for (j = 0; j < 4; j++)
|
|
||||||
data[i * 4 + j] = data[i];
|
|
||||||
|
|
||||||
sha256_init(midstate);
|
|
||||||
sha256_transform(midstate, pdata, 0);
|
|
||||||
memcpy(prehash, midstate, 32);
|
|
||||||
sha256d_prehash(prehash, pdata + 16);
|
|
||||||
for (i = 7; i >= 0; i--) {
|
|
||||||
for (j = 0; j < 4; j++) {
|
|
||||||
midstate[i * 4 + j] = midstate[i];
|
|
||||||
prehash[i * 4 + j] = prehash[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
for (i = 0; i < 4; i++)
|
|
||||||
data[4 * 3 + i] = ++n;
|
|
||||||
|
|
||||||
sha256d_ms_4way(hash, data, midstate, prehash);
|
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
|
||||||
if (swab32(hash[4 * 7 + i]) <= Htarg) {
|
|
||||||
pdata[19] = data[4 * 3 + i];
|
|
||||||
sha256d_80_swap(hash, pdata);
|
|
||||||
if (fulltest(hash, ptarget)) {
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
|
||||||
pdata[19] = n;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* HAVE_SHA256_4WAY */
|
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_8WAY
|
|
||||||
|
|
||||||
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
|
||||||
const uint32_t *midstate, const uint32_t *prehash);
|
|
||||||
|
|
||||||
static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata,
|
|
||||||
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
|
|
||||||
{
|
|
||||||
uint32_t data[8 * 64] __attribute__((aligned(128)));
|
|
||||||
uint32_t hash[8 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t midstate[8 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t prehash[8 * 8] __attribute__((aligned(32)));
|
|
||||||
uint32_t n = pdata[19] - 1;
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
|
||||||
sha256d_preextend(data);
|
|
||||||
for (i = 31; i >= 0; i--)
|
|
||||||
for (j = 0; j < 8; j++)
|
|
||||||
data[i * 8 + j] = data[i];
|
|
||||||
|
|
||||||
sha256_init(midstate);
|
|
||||||
sha256_transform(midstate, pdata, 0);
|
|
||||||
memcpy(prehash, midstate, 32);
|
|
||||||
sha256d_prehash(prehash, pdata + 16);
|
|
||||||
for (i = 7; i >= 0; i--) {
|
|
||||||
for (j = 0; j < 8; j++) {
|
|
||||||
midstate[i * 8 + j] = midstate[i];
|
|
||||||
prehash[i * 8 + j] = prehash[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
for (i = 0; i < 8; i++)
|
|
||||||
data[8 * 3 + i] = ++n;
|
|
||||||
|
|
||||||
sha256d_ms_8way(hash, data, midstate, prehash);
|
|
||||||
|
|
||||||
for (i = 0; i < 8; i++) {
|
|
||||||
if (swab32(hash[8 * 7 + i]) <= Htarg) {
|
|
||||||
pdata[19] = data[8 * 3 + i];
|
|
||||||
sha256d_80_swap(hash, pdata);
|
|
||||||
if (fulltest(hash, ptarget)) {
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
|
||||||
pdata[19] = n;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* HAVE_SHA256_8WAY */
|
|
||||||
|
|
||||||
int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
|
||||||
uint32_t max_nonce, unsigned long *hashes_done)
|
|
||||||
{
|
|
||||||
uint32_t data[64] __attribute__((aligned(128)));
|
|
||||||
uint32_t hash[8] __attribute__((aligned(32)));
|
|
||||||
uint32_t midstate[8] __attribute__((aligned(32)));
|
|
||||||
uint32_t prehash[8] __attribute__((aligned(32)));
|
|
||||||
uint32_t n = pdata[19] - 1;
|
|
||||||
const uint32_t first_nonce = pdata[19];
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_8WAY
|
|
||||||
if (sha256_use_8way())
|
|
||||||
return scanhash_sha256d_8way(thr_id, pdata, ptarget,
|
|
||||||
max_nonce, hashes_done);
|
|
||||||
#endif
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
|
||||||
if (sha256_use_4way())
|
|
||||||
return scanhash_sha256d_4way(thr_id, pdata, ptarget,
|
|
||||||
max_nonce, hashes_done);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
|
||||||
sha256d_preextend(data);
|
|
||||||
|
|
||||||
sha256_init(midstate);
|
|
||||||
sha256_transform(midstate, pdata, 0);
|
|
||||||
memcpy(prehash, midstate, 32);
|
|
||||||
sha256d_prehash(prehash, pdata + 16);
|
|
||||||
|
|
||||||
do {
|
|
||||||
data[3] = ++n;
|
|
||||||
sha256d_ms(hash, data, midstate, prehash);
|
|
||||||
if (swab32(hash[7]) <= Htarg) {
|
|
||||||
pdata[19] = data[3];
|
|
||||||
sha256d_80_swap(hash, pdata);
|
|
||||||
if (fulltest(hash, ptarget)) {
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
|
||||||
pdata[19] = n;
|
|
||||||
return 0;
|
|
||||||
}
|
|
27
algo/zr5.c
27
algo/zr5.c
@@ -32,12 +32,10 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "algo/groestl/sph_groestl.h"
|
||||||
#include "algo/keccak/sph_keccak.h"
|
#include "algo/keccak/sph_keccak.h"
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
#ifndef NO_AES_NI
|
||||||
#include "algo/groestl/sse2/grso.h"
|
|
||||||
#include "algo/groestl/sse2/grso-macro.c"
|
|
||||||
#else
|
|
||||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||||
#include "algo/echo/aes_ni/hash_api.h"
|
#include "algo/echo/aes_ni/hash_api.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -61,17 +59,21 @@
|
|||||||
#define POK_DATA_MASK 0xFFFF0000
|
#define POK_DATA_MASK 0xFFFF0000
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
#ifndef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
hashState_groestl groestl;
|
sph_groestl512_context groestl;
|
||||||
|
#else
|
||||||
|
hashState_groestl groestl;
|
||||||
#endif
|
#endif
|
||||||
sph_keccak512_context keccak;
|
sph_keccak512_context keccak;
|
||||||
} zr5_ctx_holder;
|
} zr5_ctx_holder;
|
||||||
|
|
||||||
zr5_ctx_holder zr5_ctx;
|
zr5_ctx_holder zr5_ctx;
|
||||||
|
|
||||||
void init_zr5_ctx()
|
void init_zr5_ctx()
|
||||||
{
|
{
|
||||||
#ifndef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
|
sph_groestl512_init( &zr5_ctx.groestl );
|
||||||
|
#else
|
||||||
init_groestl( &zr5_ctx.groestl );
|
init_groestl( &zr5_ctx.groestl );
|
||||||
#endif
|
#endif
|
||||||
sph_keccak512_init(&zr5_ctx.keccak);
|
sph_keccak512_init(&zr5_ctx.keccak);
|
||||||
@@ -88,10 +90,6 @@ DATA_ALIGN16(sph_u64 hashctB);
|
|||||||
|
|
||||||
//memset(hash, 0, 128);
|
//memset(hash, 0, 128);
|
||||||
|
|
||||||
#ifdef NO_AES_NI
|
|
||||||
grsoState sts_grs;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static const int arrOrder[][4] =
|
static const int arrOrder[][4] =
|
||||||
{
|
{
|
||||||
{ 0, 1, 2, 3 }, { 0, 1, 3, 2 }, { 0, 2, 1, 3 }, { 0, 2, 3, 1 },
|
{ 0, 1, 2, 3 }, { 0, 1, 3, 2 }, { 0, 2, 1, 3 }, { 0, 2, 3, 1 },
|
||||||
@@ -123,9 +121,8 @@ static const int arrOrder[][4] =
|
|||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
#ifdef NO_AES_NI
|
#ifdef NO_AES_NI
|
||||||
{GRS_I;
|
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||||
GRS_U;
|
sph_groestl512_close(&ctx.groestl, hash);
|
||||||
GRS_C; }
|
|
||||||
#else
|
#else
|
||||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||||
final_groestl( &ctx.groestl, (char*)hash);
|
final_groestl( &ctx.groestl, (char*)hash);
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [3.4.12])
|
AC_INIT([cpuminer-opt], [3.5.0])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
10
cpu-miner.c
10
cpu-miner.c
@@ -2031,7 +2031,7 @@ bool jr2_stratum_handle_response( json_t *val )
|
|||||||
|
|
||||||
static bool stratum_handle_response( char *buf )
|
static bool stratum_handle_response( char *buf )
|
||||||
{
|
{
|
||||||
json_t *val, *res_val, *id_val;
|
json_t *val, *id_val;
|
||||||
json_error_t err;
|
json_error_t err;
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
@@ -2041,7 +2041,7 @@ static bool stratum_handle_response( char *buf )
|
|||||||
applog(LOG_INFO, "JSON decode failed(%d): %s", err.line, err.text);
|
applog(LOG_INFO, "JSON decode failed(%d): %s", err.line, err.text);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
res_val = json_object_get( val, "result" );
|
json_object_get( val, "result" );
|
||||||
id_val = json_object_get( val, "id" );
|
id_val = json_object_get( val, "id" );
|
||||||
if ( !id_val || json_is_null(id_val) )
|
if ( !id_val || json_is_null(id_val) )
|
||||||
goto out;
|
goto out;
|
||||||
@@ -2477,9 +2477,9 @@ void parse_arg(int key, char *arg )
|
|||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
free(rpc_url);
|
free(rpc_url);
|
||||||
rpc_url = (char*) malloc(strlen(hp) + 8);
|
rpc_url = (char*) malloc( strlen(hp) + 15 );
|
||||||
sprintf(rpc_url, "http://%s", hp);
|
sprintf( rpc_url, "stratum+tcp://%s", hp );
|
||||||
short_url = &rpc_url[sizeof("http://")-1];
|
short_url = &rpc_url[ sizeof("stratum+tcp://") - 1 ];
|
||||||
}
|
}
|
||||||
have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7);
|
have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7);
|
||||||
break;
|
break;
|
||||||
|
1
miner.h
1
miner.h
@@ -331,6 +331,7 @@ bool has_sse();
|
|||||||
void cpu_bestcpu_feature( char *outbuf, size_t maxsz );
|
void cpu_bestcpu_feature( char *outbuf, size_t maxsz );
|
||||||
void cpu_getname(char *outbuf, size_t maxsz);
|
void cpu_getname(char *outbuf, size_t maxsz);
|
||||||
void cpu_getmodelid(char *outbuf, size_t maxsz);
|
void cpu_getmodelid(char *outbuf, size_t maxsz);
|
||||||
|
void cpu_brand_string( char* s );
|
||||||
|
|
||||||
float cpu_temp( int core );
|
float cpu_temp( int core );
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user