This commit is contained in:
Jay D Dee
2017-01-12 19:40:17 -05:00
parent 06f82c5b97
commit badc80f071
54 changed files with 410 additions and 25234 deletions

View File

@@ -77,8 +77,6 @@ cpuminer_SOURCES = \
algo/fresh.c \ algo/fresh.c \
algo/groestl/groestl.c \ algo/groestl/groestl.c \
algo/groestl/myr-groestl.c \ algo/groestl/myr-groestl.c \
algo/groestl/sse2/grso.c\
algo/groestl/sse2/grso-asm.c\
algo/groestl/aes_ni/hash-groestl.c \ algo/groestl/aes_ni/hash-groestl.c \
algo/groestl/aes_ni/hash-groestl256.c \ algo/groestl/aes_ni/hash-groestl256.c \
algo/haval/haval.c\ algo/haval/haval.c\

359
NEWS
View File

@@ -1,359 +0,0 @@
Version 1.2 (Tanguy Pruvot)
- Add cryptonight-light (Aeon)
- Add Lyra2REv2 algo (Vertcoin)
- Allow to load a remote config with curl
- Algorithm parameter is now case insensitive
- Drop anime algo (dead coin)
- Add Sib(coin) algo
- Compute and show network diff in pools too
- Checkup on arm, tested ok on Tegra K1 (CyanogenMod 12.1)
version 1.1 (Tanguy Pruvot)
- Add basic API remote control (quit/seturl)
- Add GroestlCoin, Diamond and Myriad variants
- Add Pluck algo and fix gbt query crash
- Add ZR5 algo (ZRC) and fix longpoll bug on linux
- Add Luffa algo
- Add Skein2 algo (Double Skein for Woodcoin)
- Add Animecoin algo (Quark variant)
- Add Dropcoin pok algo
- Add BMW-256 (MDT) algo
- Add Axiom algo
- Change some logged strings
- Use all cores by default, not N-1
- Handle a default config to run without params
- add cpu-priority and cpu-affinity options
- add NSIS installer script for windows setup
- Implement background option on windows
- add -m stratum option (diff-multiplier)
- Time limit to allow benchmarks or cron jobs
- Fix Cryptonight stratum support
- Allow to disable extranonce support
version 1.0.9 (Tanguy Pruvot)
- pool extranonce subscribe
- upgrade jansson
- lyra2 algo
- fix for solo mining
- API websocket support
Version 1.0.8 (Tanguy Pruvot)
- API Monitoring Support
- Enhance config values support (int/real/bool)
- Rewrite blake algo (speed x2)
Version 1.0.7 (Tanguy Pruvot)
- Add NIST5 and QUBIT algos
- Show current stratum bloc height
- Fix wallet solo mining
Version 1.0.6 (Tanguy Pruvot)
- Fix scrypt algo
- More work on VC2013
- Add -f tuning option to test with reduced difficulty
- Add S3 algo
Version 1.0.5 (Tanguy Pruvot)
- Merge remaining v2.4 cpu-miner changes
- Add colored output (disable with --no-color)
- Test and fix blake on NEOS, needs 14 rounds (was 8)
- Add pentablake (5x blake256) (from bitbandi)
- Add neoscrypt
- Windows (VC++ 2013 and MinGW64 build support)
- Enhance --version informations (compiler + lib versions)
Version 1.0.4 (Tanguy Pruvot)
- Add x13 x14 and x15 algos (Sherlockcoin, X14Coin, Webcoin..)
- Add scrypt:N variants (Vertcoin)
- Add fresh algo
- Fix thread khashes/s value output
- Add a configure option --disable-assembly
Version multi 1.0.3 (Lucas Jones)
- Add new algos :
x11 (Darkcoin [DRK], Hirocoin, Limecoin)
cryptonight (Bytecoin [BCN], Monero)
keccak (Maxcoin HelixCoin, CryptoMeth, Galleon, 365coin, Slothcoin, BitcointalkCoin)
hefty1 (Heavycoin)
quark (Quarkcoin)
skein (Skeincoin, Myriadcoin)
shavite3 (INKcoin)
blake (Blakecoin)
- See README.md
Version 2.4 - May 20, 2014
- Add support for the getblocktemplate RPC method (BIP 22)
- Allow tunnelling Stratum through HTTP proxies
- Add a --no-redirect option to ignore redirection requests
- Timeout for long polling is now disabled by default
- Fix CPU affinity on Linux (kiyominer)
- Add support for building under 64-bit Cygwin
- Expand version information with build details
Version 2.3.3 - Feb 27, 2014
- The --url option is now mandatory
- Do not switch to Stratum when using an HTTP proxy
- Fix scheduling policy change on Linux (clbr)
- Fix CPU affinity on FreeBSD (ache)
- Compatibility fixes for various platforms, including Solaris 8
and old versions of OS X
- A man page for minerd is now available
Version 2.3.2 - Jul 10, 2013
- Add optimizations for AVX2-capable x86-64 processors
- Ensure that the output stream is flushed after every log message
- Fix an undefined-behavior bug in the Stratum code
Version 2.3.1 - Jun 18, 2013
- Add a --cert option for specifying an SSL certificate (martinwguy)
- Fix a bug that only made SHA-256d mining work at difficulty 1
- Fix a couple of compatibility issues with some Stratum servers
Version 2.3 - Jun 12, 2013
- Add support for the Stratum mining protocol
- Automatically switch to Stratum if the mining server supports
the X-Stratum extension, unless --no-stratum is used
- Set CPU affinity on FreeBSD (lye)
- Fix a bug in libcurl initialization (martinwguy)
Version 2.2.3 - Aug 5, 2012
- Add optimized ARM NEON code for scrypt and SHA-256d
- Add a --benchmark option that allows offline testing
- Support for the X-Reject-Reason extension
Version 2.2.2 - Jun 7, 2012
- Various performance improvements for x86 and x86-64
- Optimize scrypt for ARMv5E and later processors
- Set the priority of miner threads to idle on Windows
- Add an option to start minerd as a daemon on POSIX systems
Version 2.2.1 - May 2, 2012
- Add optimized code for ARM processors
- Support for building on NetBSD and OpenBSD
- Various compatibility fixes for AIX (pontius)
Version 2.2 - Apr 2, 2012
- Add an optimized SHA-256d algorithm, with specialized code
for x86 and x86-64 and support for AVX and XOP instructions
- Slight performance increase for scrypt on x86 and x86-64
- The default timeout is now 270 seconds
Version 2.1.5 - Mar 7, 2012
- Add optimizations for AVX-capable x86-64 processors
- Assume HTTP if no protocol is specified for the mining server
- Fix MinGW compatibility issues and update build instructions
- Add support for building on Solaris using gcc (pontius)
Version 2.1.4 - Feb 28, 2012
- Implement 4-way SHA-256 on x86-64
- Add TCP keepalive to long polling connections
- Support HTTP and SOCKS proxies via the --proxy option
- Username and password are no longer mandatory
- Add a script that makes assembly code compatible with old versions
of the GNU assembler that do not support macros
Version 2.1.3 - Feb 12, 2012
- Smart handling of long polling failures: switch to short scan time
if long polling fails, and only try to reactivate it if the server
continues to advertise the feature in HTTP headers
- Add "X-Mining-Extensions: midstate" to HTTP headers (p2k)
- Add support for the "submitold" extension, used by p2pool
- It is now possible to specify username and password in the URL,
like this: http://username:password@host:port/
- Add a --version option, and clean up --help output
- Avoid division by zero when computing hash rates
- Handle empty responses properly (TimothyA)
- Eliminate the delay between starting threads
Version 2.1.2 - Jan 26, 2012
- Do not submit work that is known to be stale
- Allow miner threads to ask for new work if the current one is at least
45 seconds old and long polling is enabled
- Refresh work when long polling times out
- Fix minor speed regression
- Modify x86-64 code to make it compatible with older versions of binutils
Version 2.1.1 - Jan 20, 2012
- Handle network errors properly
- Make scantime retargeting more accurate
Version 2.1 - Jan 19, 2012
- Share the same work among all threads
- Do not ask for new work if the current one is not expired
- Do not discard the work returned by long polling
Version 2.0 - Jan 16, 2012
- Change default port to 9332 for Litecoin and remove default credentials
- Add 'scrypt' as the default algorithm and remove other algorithms (ArtForz)
- Optimize scrypt for x86 and x86-64
- Make scantime retargeting less granular (ArtForz)
- Test the whole hash instead of just looking at the high 32 bits
- Add configurable timeout, with a default of 180 seconds
- Add share summary output (inlikeflynn)
- Fix priority and CPU count detection on Windows
- Fix parameters -u and -p, and add short options -o and -O
Version 1.0.2 - Jun 13, 2011
- Linux x86_64 optimisations - Con Kolivas
- Optimise for x86_64 by default by using sse2_64 algo
- Detects CPUs and sets number of threads accordingly
- Uses CPU affinity for each thread where appropriate
- Sets scheduling policy to lowest possible
- Minor performance tweaks
Version 1.0.1 - May 14, 2011
- OSX support
Version 1.0 - May 9, 2011
- jansson 2.0 compatibility
- correct off-by-one in date (month) display output
- fix platform detection
- improve yasm configure bits
- support full URL, in X-Long-Polling header
Version 0.8.1 - March 22, 2011
- Make --user, --pass actually work
- Add User-Agent HTTP header to requests, so that server operators may
more easily identify the miner client.
- Fix minor bug in example JSON config file
Version 0.8 - March 21, 2011
- Support long polling: http://deepbit.net/longpolling.php
- Adjust max workload based on scantime (default 5 seconds,
or 60 seconds for longpoll)
- Standardize program output, and support syslog on Unix platforms
- Suport --user/--pass options (and "user" and "pass" in config file),
as an alternative to the current --userpass
Version 0.7.2 - March 14, 2011
- Add port of ufasoft's sse2 assembly implementation (Linux only)
This is a substantial speed improvement on Intel CPUs.
- Move all JSON-RPC I/O to separate thread. This reduces the
number of HTTP connections from one-per-thread to one, reducing resource
usage on upstream bitcoind / pool server.
Version 0.7.1 - March 2, 2011
- Add support for JSON-format configuration file. See example
file example-cfg.json. Any long argument on the command line
may be stored in the config file.
- Timestamp each solution found
- Improve sha256_4way performance. NOTE: This optimization makes
the 'hash' debug-print output for sha256_way incorrect.
- Use __builtin_expect() intrinsic as compiler micro-optimization
- Build on Intel compiler
- HTTP library now follows HTTP redirects
Version 0.7 - February 12, 2011
- Re-use CURL object, thereby reuseing DNS cache and HTTP connections
- Use bswap_32, if compiler intrinsic is not available
- Disable full target validation (as opposed to simply H==0) for now
Version 0.6.1 - February 4, 2011
- Fully validate "hash < target", rather than simply stopping our scan
if the high 32 bits are 00000000.
- Add --retry-pause, to set length of pause time between failure retries
- Display proof-of-work hash and target, if -D (debug mode) enabled
- Fix max-nonce auto-adjustment to actually work. This means if your
scan takes longer than 5 seconds (--scantime), the miner will slowly
reduce the number of hashes you work on, before fetching a new work unit.
Version 0.6 - January 29, 2011
- Fetch new work unit, if scanhash takes longer than 5 seconds (--scantime)
- BeeCee1's sha256 4way optimizations
- lfm's byte swap optimization (improves via, cryptopp)
- Fix non-working short options -q, -r
Version 0.5 - December 28, 2010
- Exit program, when all threads have exited
- Improve JSON-RPC failure diagnostics and resilience
- Add --quiet option, to disable hashmeter output.
Version 0.3.3 - December 27, 2010
- Critical fix for sha256_cryptopp 'cryptopp_asm' algo
Version 0.3.2 - December 23, 2010
- Critical fix for sha256_via
Version 0.3.1 - December 19, 2010
- Critical fix for sha256_via
- Retry JSON-RPC failures (see --retry, under "--help" output)
Version 0.3 - December 18, 2010
- Add crypto++ 32bit assembly implementation
- show version upon 'minerd --help'
- work around gcc 4.5.x bug that killed 4way performance
Version 0.2.2 - December 6, 2010
- VIA padlock implementation works now
- Minor build and runtime fixes
Version 0.2.1 - November 29, 2010
- avoid buffer overflow when submitting solutions
- add Crypto++ sha256 implementation (C only, ASM elided for now)
- minor internal optimizations and cleanups
Version 0.2 - November 27, 2010
- Add script for building a Windows installer
- improve hash performance (hashmeter) statistics
- add tcatm 4way sha256 implementation
- Add experimental VIA Padlock sha256 implementation
Version 0.1.2 - November 26, 2010
- many small cleanups and micro-optimizations
- build win32 exe using mingw
- RPC URL, username/password become command line arguments
- remove unused OpenSSL dependency
Version 0.1.1 - November 24, 2010
- Do not build sha256_generic module separately from cpuminer.
Version 0.1 - November 24, 2010
- Initial release.

333
README.md
View File

@@ -1,262 +1,117 @@
This project is forked by Jay D Dee. cpuminer-opt is a fork of cpuminer-multi by TPruvot with optimizations
imported from other miners developped by lucas Jones, djm34, Wolf0, pooler,
Jeff garzik, ig0tik3d, elmad, palmd, and Optiminer, with additional
optimizations by Jay D Dee.
Updated for v3.3.2 Windows support. All of the code is believed to be open and free. If anyone has a
claim to any of it post your case in the icpuminer-opt Bitcoin Talk forum
or by email.
Building on linux prerequisites: https://bitcointalk.org/index.php?topic=1326803.0
It is assumed users know how to install packages on their system and mailto://jayddee246@gmail.com
be able to compile standard source packages. This is basic Linux and
beyond the scope of cpuminer-opt.
Make sure you have the basic development packages installed. See file RELEASE_NOTES for change log and compile instructions.
Here is a good start:
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu Supported Algorithms
--------------------
Install any additional dependencies needed by cpuminer-opt. The list below argon2
are some of the ones that may not be in the default install and need to axiom Shabal-256 MemoHash
be installed manually. There may be others, read the error messages they bastion
will give a clue as to the missing package. blake Blake-256 (SFR)
blakecoin blake256r8
blake2s Blake-2 S
bmw BMW 256
c11 Flax
cryptolight Cryptonight-light
cryptonight cryptonote, Monero (XMR)
decred
drop Dropcoin
fresh Fresh
groestl groestl
heavy Heavy
hmq1725 Espers
hodl Hodlcoin
keccak Keccak
lbry LBC, LBRY Credits
luffa Luffa
lyra2re lyra2
lyra2rev2 lyrav2
lyra2z Zcoin (XZC)
lyra2zoin Zoin (ZOI)
m7m Magi (XMG)
myr-gr Myriad-Groestl
neoscrypt NeoScrypt(128, 2, 1)
nist5 Nist5
pluck Pluck:128 (Supcoin)
pentablake Pentablake
quark Quark
qubit Qubit
scrypt scrypt(1024, 1, 1) (default)
scrypt:N scrypt(N, 1, 1)
scryptjane:nf
sha256d SHA-256d
shavite3 Shavite3
skein Skein+Sha (Skeincoin)
skein2 Double Skein (Woodcoin)
vanilla blake256r8vnl (VCash)
veltor
whirlpool
whirlpoolx
x11 X11
x11evo Revolvercoin
x11gost sib (SibCoin)
x13 X13
x14 X14
x15 X15
x17
xevan Bitsend
yescrypt
zr5 Ziftr
The folliwing command should install everything you need on Debian based Requirements
packages: ------------
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake 1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
optimizations a CPU with AES_NI is required. This includes Intel Westbridge
and newer and AMD equivalents. Further optimizations are available on some
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
Building on Linux, see below for Windows. Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
performance.
Dependencies 2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
Centos are known to work and have all dependencies in their repositories.
Others may work but may require more effort. 64 bit Windows OS is now supported
with mingw_w64 and msys.
build-essential (for Ubuntu, Development Tools package group on Fedora) 3. Stratum pool, cpuminer-opt only supports stratum minning.
automake
libjansson-dev
libgmp-dev
libcurl4-openssl-dev
libssl-dev
pthreads
zlib
tar xvzf [file.tar.gz] Errata
cd [file] ------
Run build.sh to build on Linux or execute the following commands. cpuminer-opt does not work mining Decred algo at Nicehash and produces
only "invalid extranonce2 size" rejects. It works at Zpool.
./autogen.sh Benchmark testing does not work for x11evo.
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make
Start mining. Bugs
----
./cpuminer -a algo ... Users are encouraged to post their bug reports on the Bitcoin Talk
forum at:
Building on Windows prerequisites: https://bitcointalk.org/index.php?topic=1326803.0
msys
mingw_w64
Visual C++ redistributable 2008 X64
openssl, not sure about this
Install msys and mingw_w64, only needed once.
Unpack msys into C:\msys or your preferred directory.
Install mingw__w64 from win-builds.
Follow instructions, check "msys or cygwin" and "x86_64" and accept default
existing msys instalation.
Open a msys shell by double clicking on msys.bat.
Note that msys shell uses linux syntax for file specifications, "C:\" is
mounted at "/c/".
Add mingw bin directory to PATH variable
PATH="/c/msys/opt/windows_64/bin/:$PATH"
Instalation complete, compile cpuminer-opt
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
or similar Windows program.
In msys shell cd to miner directory.
cd /c/path/to/cpuminer-opt
Run winbuild.sh to build on Windows or execute the following commands.
./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
make
The following tips may be useful for older AMD CPUs.
Some users with AMD CPUs without AES_NI have reported problems compiling
with build.sh or "-march=native". Problems have included compile errors
and poor performance. These users are recommended to compile manually
specifying "-march=btver1" on the configure command line.
Support for even older x86_64 without AES_NI or SSE2 is not availble.
cpuminer-multi by TPruvot supports this architecture.
The rest of this file is taken from cpuminer-multi.
----------------
CPUMiner-Multi
==============
[![Build Status](https://travis-ci.org/tpruvot/cpuminer-multi.svg)](https://travis-ci.org/tpruvot/cpuminer-multi)
This is a multi-threaded CPU miner,
fork of [pooler](//github.com/pooler)'s cpuminer (see AUTHORS for list of contributors).
#### Table of contents
* [Algorithms](#algorithms)
* [Dependencies](#dependencies)
* [Download](#download)
* [Build](#build)
* [Usage instructions](#usage-instructions)
* [Donations](#donations)
* [Credits](#credits)
* [License](#license)
Algorithms
==========
#### Currently supported
*__scrypt__ (Litecoin, Dogecoin, Feathercoin, ...)
*__scrypt:N__
*__sha256d__ (Bitcoin, Freicoin, Peercoin/PPCoin, Terracoin, ...)
*__axiom__ (Axiom Shabal-256 based MemoHash)
*__blake__ (Saffron [SFR] Blake-256)
*__bmw__ (Midnight [MDT] BMW-256)
*__cryptonight__ (Bytecoin [BCN], Monero)
*__cryptonight-light__ (Aeon)
*__dmd-gr__ (Diamond-Groestl)
*__fresh__ (FreshCoin)
*__groestl__ (Groestlcoin)
*__lyra2RE__ (Lyrabar, Cryptocoin)
*__lyra2REv2__ (VertCoin [VTC])
*__myr-gr__ (Myriad-Groestl)
*__neoscrypt__ (Feathercoin)
*__nist5__ (MistCoin [MIC], TalkCoin [TAC], ...)
*__pentablake__ (Joincoin)
*__pluck__ (Supcoin [SUP])
*__quark__ (Quarkcoin)
*__qubit__ (MyriadCoin [MYR])
*__skein__ (Skeincoin, Myriadcoin, Xedoscoin, ...)
*__skein2__ (Woodcoin)
*__s3__ (OneCoin)
*__x11__ (Darkcoin [DRK], Hirocoin, Limecoin, ...)
*__x13__ (Sherlockcoin, [ACE], [B2B], [GRC], [XHC], ...)
*__x14__ (X14, Webcoin [WEB])
*__x15__ (RadianceCoin [RCE])
*__zr5__ (Ziftrcoin [ZRC])
#### Implemented, but untested
* ? blake2s
* ? hefty1 (Heavycoin)
* ? keccak (Maxcoin HelixCoin, CryptoMeth, Galleon, 365coin, Slothcoin, BitcointalkCoin)
* ? luffa (Joincoin, Doomcoin)
* ? shavite3 (INKcoin)
* ? sib X11 + gost (SibCoin)
#### Planned support for
* *scrypt-jane* (YaCoin, CopperBars, Pennies, Tickets, etc..)
Dependencies
============
* libcurl http://curl.haxx.se/libcurl/
* jansson http://www.digip.org/jansson/ (jansson source is included in-tree)
* openssl libcrypto https://www.openssl.org/
* pthreads
* zlib (for curl/ssl)
Download
========
* Windows releases: https://github.com/tpruvot/cpuminer-multi/releases
* Git tree: https://github.com/tpruvot/cpuminer-multi
* Clone with `git clone https://github.com/tpruvot/cpuminer-multi`
Build
=====
#### Basic *nix build instructions:
* just use ./build.sh
_OR_
* ./autogen.sh # only needed if building from git repo
* ./nomacro.pl # only needed if building on Mac OS X or with Clang
* ./configure CFLAGS="-O3 -march=native" --with-crypto --with-curl
* # Use -march=native if building for a single machine
* make
#### Notes for AIX users:
* To build a 64-bit binary, export OBJECT_MODE=64
* GNU-style long options are not supported, but are accessible via configuration file
#### Basic Windows build with Visual Studio 2013
* All the required .lib files are now included in tree (windows only)
* AVX enabled by default for x64 platform (AVX2 and XOP could also be used)
#### Basic Windows build instructions, using MinGW64:
* Install MinGW64 and the MSYS Developer Tool Kit (http://www.mingw.org/)
* Make sure you have mstcpip.h in MinGW\include
* install pthreads-w64
* Install libcurl devel (http://curl.haxx.se/download.html)
* Make sure you have libcurl.m4 in MinGW\share\aclocal
* Make sure you have curl-config in MinGW\bin
* Install openssl devel (https://www.openssl.org/related/binaries.html)
* In the MSYS shell, run:
* for 64bit, you can use ./mingw64.sh else :
./autogen.sh # only needed if building from git repo
* LIBCURL="-lcurldll" ./configure CFLAGS="*-march=native*"
* # Use -march=native if building for a single machine
* make
#### Architecture-specific notes:
* ARM:
* No runtime CPU detection. The miner can take advantage of some instructions specific to ARMv5E and later processors, but the decision whether to use them is made at compile time, based on compiler-defined macros.
* To use NEON instructions, add "-mfpu=neon" to CFLAGS.
* x86:
* The miner checks for SSE2 instructions support at runtime, and uses them if they are available.
* x86-64:
* The miner can take advantage of AVX, AVX2 and XOP instructions, but only if both the CPU and the operating system support them.
* Linux supports AVX starting from kernel version 2.6.30.
* FreeBSD supports AVX starting with 9.1-RELEASE.
* Mac OS X added AVX support in the 10.6.8 update.
* Windows supports AVX starting from Windows 7 SP1 and Windows Server 2008 R2 SP1.
* The configure script outputs a warning if the assembler doesn't support some instruction sets. In that case, the miner can still be built, but unavailable optimizations are left off.
Usage instructions
==================
Run "cpuminer --help" to see options.
### Connecting through a proxy
Use the --proxy option.
To use a SOCKS proxy, add a socks4:// or socks5:// prefix to the proxy host
Protocols socks4a and socks5h, allowing remote name resolving, are also available since libcurl 7.18.0.
If no protocol is specified, the proxy is assumed to be a HTTP proxy.
When the --proxy option is not used, the program honors the http_proxy and all_proxy environment variables.
Donations Donations
========= ---------
Donations for the work done in this fork are accepted :
Tanguy Pruvot : I do not do this for money but I have a donation address if users
* BTC: `1FhDPLPpw18X4srecguG3MxJYe4a1JsZnd` are so inclined.
* ZRC: `ZX6LmrCwphNgitxvDnf8TX6Tsegfxpeozx`
Lucas Jones : bitcoin:12tdvfF7KmAsihBXQXynT6E6th2c2pByTT?label=donations
* MRO: `472haywQKoxFzf7asaQ4XKBc2foAY4ezk8HiN63ifW4iAbJiLnfmJfhHSR9XmVKw2WYPnszJV9MEHj9Z5WMK9VCNHaGLDmJ`
* BTC: `139QWoktddChHsZMWZFxmBva4FM96X2dhE`
Credits Happy mining!
=======
CPUMiner-multi was forked from pooler's CPUMiner, and has been started by Lucas Jones.
* [tpruvot](https://github.com/tpruvot) added all the recent features and newer algorythmns
* [Wolf9466](https://github.com/wolf9466) helped with Intel AES-NI support for CryptoNight
License
=======
GPLv2. See COPYING for details.

View File

@@ -1,85 +0,0 @@
cpuminer-opt now supports over 40 algorithms on CPUs with at least SSE2
capabilities including Intel Core2, Nehalem and AMD equivalent. See the
performance chart below for details.
In addition 19 algorithms have optimizations to take advantage of
CPUs with AES_NI for even greater performance, including the Intel
Westbridge and newer and AMD equivalent. See the performance
comparison below.
New in 3.4.12
- lyra2z (zcoin) modified for blocks after 8192
- fixed scryptjane to support various N factors
Users with non-SSE2 CPUs or who want to mine algos not supported by
cpuminer-opt may find cpuminer-multi by TPruvot useful.
Chart out of date, will be removed.
The performance chart below is for an Intel i7-6700K @ 4 GHz, 16 GB mem.
Normalization rates have been added to the chart to help with profit
switching pools. Reference algo x11 = 1.
Due to the peculiarities of some algorithms their performance on other CPU
architectures may not scale equally. Their normalizations rates will also
differ from those listed below. YMMV.
Normalized profitability = algo profitability * norm rate
AES-AVX SSE2(1) norm rate(5)
------- ------- ---------
x11 780 K 525 K 1
x13 392 298 0.50
x14 370 271 0.48
x15 341 270 0.45
x17 317 248 0.43
x11gost 562 392 0.72
x11evo 590 387 0.78
quark 1195 924 1.61
qubit 1182 765 1.45
nist5 2000 1592 3.37
zr5 850 650 1.15
c11 784 475 0.99
myr-gr 1572 1560 2.12
hmq1725 214 161 0.29
m7m 121 77.4 0.155
lyra2re 1380 900 1.76
lyra2rev2 1350 980 1.73
cryptonight 290 H 165 H 0.00039
cryptolight 685 ? 0.00093
hodl 600 200 0.00081
lbry (4) 2620 3.53
neoscrypt (4) 32 K 0.043
argon2 (4) 33.7 0.045
groestl (4) 931 1.26
skein (4) 5747 7.77
skein2 (4) 8675 11.7
pentablake (4) 3960 5.35
keccak (4) 7790 10.5
scrypt (4) 113 0.153
sha256d (4) 62.5 0.084
veltor (4) 1017 1.30
blake (4) 22.4 M 30.4
blake2s (4) 19.0 25.7
vanilla (4) 33.0 44.6
blakecoin (4) 33.9 45.8
decred (4) 22.6 30.5
axiom (4) 72 H 0.000098
yescrypt (4) 3760 0.0051
scryptjane (4) 250 0.00034
pluck(2) (4) 1925 0.0026
drop(2) (4) 934 K 1.26
fresh(2) (4) 528 0.71
whirlpool(2) (4) 1290 1.74
whirlpoolx(2) (4) 5110 6.9
Footnotes:
(1) SSE2 rates are simulated in software (-march=core2) on an i7.
(2) Benchmark tested only
(3) CPU architecture not supported for algo. It won't work.
(4) AES_NI Optimization not available for CPU artchitecture. Uses SSE2, slower.
(5) Normalised profitability = algo profitability * norm rate, x11 = 1
(6) Not supported on Windows

View File

@@ -1,81 +1,204 @@
Change Log
----------
cpuminer-opt-3.1 release notes v3.5.0
--------------i----------------
cpuminer-opt combines the best of minerd (x11), cp3u (quark) and Fixed blakecoin and vanilla increasing rejects with number of threads.
cpuminer-multi (multi-algo support plus non-kernel related Removed support for SSE2 Groestl functions. SSE2 groestl remains available
enhancements). Additional credits to Lucas Jones, elmad, palmd, in v3.4.12 and the legacy branch.
djm34, pooler, Jeff Garzik, Wolf0 and probably others. It is no longer necessary to specify stratum+tcp:// in the url, it is assumed
and is the only supported protocol.
The core of cpuminer-opt remains cpuminer-multi and is the base for v3.4.12
this fork.
All of the code is believed to be open and free. If anyone has a lyra2z (zcoin) modified for blocks after 8192
claim to any of it post your case in the Bitcoin Talk forum, fixed scryptjane to support various N factors
link below.
Features v3.4.11
--------
V3.1 introduces a new mining engine called algo_gate. This fetaure groestl algo AES optimized +200%
is not visible to the users excetp for the additional 5% performance myr-gr algo AES optimized +100%
increase in all algos. This feature is of interest mostly to
developpers.
cpuminer provides accelerated hashing on AES-NI capable CPUs in v3.4.10
x11, x13, x14, x15, quark & qubit algorithms. It also currently
provides acceleration for SSE2 capable CPUs on quark and qubit
algorithms only. Other algorithms are available but unchanged from
cpuminer-multi-1.2pre and in various states of functionality.
V3.0 pprovides improved hash rates for many algos. See the
release annoucent for details.
Requirements xevan AES optimized +35%
------------
A 64 bit CPU with SSE2 support and any of the popular 64 bit v3.4.9
Linux distributions. Standard development tools, libcurl-devel,
the preferred SSL development package of your distribution.
Limitations fixed zr5, broken in v3.4.8
----------- added xevan algo (Bitsend, BSD) with 10% improvement
added lyra2zoin (Zoin, ZOI) fully optimized but YMMV
v3.0 is source code only that can be compiled on Linux. v3.4.8
Windows support is not yet available, but planned.
Compiling added zcoin support, optimized for AVX2 but no increase in performance
--------- fixed API display of diff for cryptonight
--show-diff is now the default, use "--hide-diff" to disable
cleaned up some cpuminer-multi artifacts
After unpacking the tarball change ito the cpuminer directory and v3.4.7
execute these commands. Note that O3 is actually the upper case
letter O.
./autogen.sh fixed benchmark, except for x11evo
./configure CFLAGS="-O3 -march=native" --with-crypto --with-curl added CPU temperature to share submission report (Linux only)
v3.4.6
For users:
- cryptolight algo is now supported with AES optimizations
- display format changed for share submissions
- colour keyed "Accepted" or "Rejected" status.
- reject count and rate displayed when share is rejected.
For developers:
- code restructuring for detecting new work
- cleaned up detection and handling of new work
- removed call to stratum_gen_work from niner_thread.
- eliminated gen_work_now gate function.
- renamed gate function init_nonce to get_new_work.
- renamed gate function alloc_scratchbuf to miner_thread_init,
removed all scracthbuf references from miner_thread and moved
implementation to the local algo files of those algos that need it.
- moved most gate targets from algo-gate.c to cpu-miner.c removing
most mining related code from algo-gate-api.c.
v3.4.5
fixed stale share rejects mining cryptonight at Nicehash
fixed compile error on Westmere CPUs
v3.4.4
fixed compile errors on Westmere CPUs, this is an interim fix that
will compile without AES on Westmere
added support for cryptonight at Nicehash, some rejects may be produced
at Nicehash only.
v3.4.3
imported optimized m7m, +42%
v3.4.2
added veltor algo
tweaked lyra2 AVX/AVX2 code for small improvement.
v3.4.1
big AVX2 optmizations for lyra2 +35%, lyra2v2 +11%, AVX also faster
fixed hmq1725
v3.4.0
fixed Windows compile error introduced in v3.3.9
fixed x11gost, broken in v3.3.7
AVX2 optimizations improving many algos:
- Lyra2RE +3%
- Lyra2REv2 +19%
- x11gost (sib) +6%
- x11evo +2.4%
- c11 +6.9%
- x11 +5%
- x13 +5%
- x14 +3.6%
- x15 +2.4%
- x17 +2.8%
- qubit +8.4%
Compile Instructions
--------------------
Building on linux prerequisites:
It is assumed users know how to install packages on their system and
be able to compile standard source packages. This is basic Linux and
beyond the scope of cpuminer-opt.
Make sure you have the basic development packages installed.
Here is a good start:
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
Install any additional dependencies needed by cpuminer-opt. The list below
are some of the ones that may not be in the default install and need to
be installed manually. There may be others, read the error messages they
will give a clue as to the missing package.
The folliwing command should install everything you need on Debian based
packages:
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake
Building on Linux, see below for Windows.
Dependencies
build-essential (for Ubuntu, Development Tools package group on Fedora)
automake
libjansson-dev
libgmp-dev
libcurl4-openssl-dev
libssl-dev
pthreads
zlib
tar xvzf [file.tar.gz]
cd [file]
Run build.sh to build on Linux or execute the following commands.
./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make make
libcurl-devel and an development packages are required to be Start mining.
installed to build this application and are available in most
Linux repositories.
To compile on older CPUs without AES_NI support use the following ./cpuminer -a algo ...
CFLAGS options: "-O3 -march=native -DNO_AES_NI"
Bugs Building on Windows prerequisites:
----
Users are encouraged to post their bug reports on the Bitcoin Talk msys
forum at: mingw_w64
Visual C++ redistributable 2008 X64
openssl, not sure about this
https://bitcointalk.org/index.php?topic=1326803.0 Install msys and mingw_w64, only needed once.
Donations Unpack msys into C:\msys or your preferred directory.
---------
I do not do this for money but I have a donation address if users Install mingw__w64 from win-builds.
are so inclined. Follow instructions, check "msys or cygwin" and "x86_64" and accept default
existing msys instalation.
bitcoin:12tdvfF7KmAsihBXQXynT6E6th2c2pByTT?label=donations Open a msys shell by double clicking on msys.bat.
Note that msys shell uses linux syntax for file specifications, "C:\" is
mounted at "/c/".
Happy mining! Add mingw bin directory to PATH variable
PATH="/c/msys/opt/windows_64/bin/:$PATH"
Instalation complete, compile cpuminer-opt
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
or similar Windows program.
In msys shell cd to miner directory.
cd /c/path/to/cpuminer-opt
Run winbuild.sh to build on Windows or execute the following commands.
./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
make
The following tips may be useful for older AMD CPUs.
Some users with AMD CPUs without AES_NI have reported problems compiling
with build.sh or "-march=native". Problems have included compile errors
and poor performance. These users are recommended to compile manually
specifying "-march=btver1" on the configure command line.
Support for even older x86_64 without AES_NI or SSE2 is not availble.

View File

@@ -12,40 +12,36 @@ void blakecoin_close(void *cc, void *dst);
#include <memory.h> #include <memory.h>
#include <openssl/sha.h> #include <openssl/sha.h>
/* Move init out of loop, so init once externally, // context management is staged for efficiency.
* and then use one single memcpy */ // 1. global initial ctx cached on startup
static sph_blake256_context blake_mid; // 2. per-thread midstate ctx cache refreshed every scan
static bool ctx_midstate_done = false; // 3. local ctx for final hash calculation
static void init_blake_hash(void) static sph_blake256_context blake_init_ctx;
static __thread sph_blake256_context blake_mid_ctx;
static void blake_midstate_init( const void* input )
{ {
blakecoin_init(&blake_mid); // copy cached initial state
ctx_midstate_done = true; memcpy( &blake_mid_ctx, &blake_init_ctx, sizeof blake_mid_ctx );
blakecoin( &blake_mid_ctx, input, 64 );
} }
void blakecoinhash(void *state, const void *input) void blakecoinhash( void *state, const void *input )
{ {
sph_blake256_context ctx; sph_blake256_context ctx;
uint8_t hash[64]; uint8_t hash[64];
uint8_t *ending = (uint8_t*) input; uint8_t *ending = (uint8_t*) input + 64;
ending += 64;
// do one memcopy to get a fresh context // copy cached midstate
if (!ctx_midstate_done) { memcpy( &ctx, &blake_mid_ctx, sizeof ctx );
init_blake_hash(); blakecoin( &ctx, ending, 16 );
blakecoin(&blake_mid, input, 64); blakecoin_close( &ctx, hash );
} memcpy( state, hash, 32 );
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
blakecoin(&ctx, ending, 16);
blakecoin_close(&ctx, hash);
memcpy(state, hash, 32);
} }
int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce, int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done) uint64_t *hashes_done )
{ {
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
@@ -57,16 +53,14 @@ int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce; uint32_t n = first_nonce;
ctx_midstate_done = false;
if (opt_benchmark) if (opt_benchmark)
HTarget = 0x7f; HTarget = 0x7f;
// we need big endian data... // we need big endian data...
// be32enc_array( endiandata, pdata, 19 );
for (int kk=0; kk < 19; kk++) for (int kk=0; kk < 19; kk++)
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]); be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
blake_midstate_init( endiandata );
#ifdef DEBUG_ALGO #ifdef DEBUG_ALGO
applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]); applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
@@ -117,6 +111,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
gate->hash = (void*)&blakecoinhash; gate->hash = (void*)&blakecoinhash;
gate->hash_alt = (void*)&blakecoinhash; gate->hash_alt = (void*)&blakecoinhash;
gate->get_max64 = (void*)&blakecoin_get_max64; gate->get_max64 = (void*)&blakecoin_get_max64;
blakecoin_init( &blake_init_ctx );
return true; return true;
} }

View File

@@ -317,7 +317,6 @@ static const sph_u64 blkIV512[8] = {
#define COMPRESS64 do { \ #define COMPRESS64 do { \
int r; \
int b=0; \ int b=0; \
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \ sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \ sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \

View File

@@ -1,133 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
*/
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( _LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if !defined(PLATFORM_BYTE_ORDER)
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#else
# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
#endif
#endif
#endif

View File

@@ -1,231 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
(a few lines added by Soeren S. Thomsen, October 2008)
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
The unsigned integer types defined here are of the form uint_<nn>t where
<nn> is the length of the type; for example, the unsigned 32-bit type is
'uint_32t'. These are NOT the same as the 'C99 integer types' that are
defined in the inttypes.h and stdint.h headers since attempts to use these
types have shown that support for them is still highly variable. However,
since the latter are of the form uint<nn>_t, a regular expression search
and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t')
can be used to convert the types used here to the C99 standard types.
*/
#ifndef _BRG_TYPES_H
#define _BRG_TYPES_H
#if defined(__cplusplus)
extern "C" {
#endif
#include <limits.h>
#if defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
# include <stddef.h>
# define ptrint_t intptr_t
#elif defined( __GNUC__ ) && ( __GNUC__ >= 3 )
# include <stdint.h>
# define ptrint_t intptr_t
#else
# define ptrint_t int
#endif
#ifndef BRG_UI8
# define BRG_UI8
# if UCHAR_MAX == 255u
typedef unsigned char uint_8t;
# else
# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h
# endif
#endif
#ifndef BRG_UI16
# define BRG_UI16
# if USHRT_MAX == 65535u
typedef unsigned short uint_16t;
# else
# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h
# endif
#endif
#ifndef BRG_UI32
# define BRG_UI32
# if UINT_MAX == 4294967295u
# define li_32(h) 0x##h##u
typedef unsigned int uint_32t;
# elif ULONG_MAX == 4294967295u
# define li_32(h) 0x##h##ul
typedef unsigned long uint_32t;
# elif defined( _CRAY )
# error This code needs 32-bit data types, which Cray machines do not provide
# else
# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h
# endif
#endif
#ifndef BRG_UI64
# if defined( __BORLANDC__ ) && !defined( __MSDOS__ )
# define BRG_UI64
# define li_64(h) 0x##h##ui64
typedef unsigned __int64 uint_64t;
# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */
# define BRG_UI64
# define li_64(h) 0x##h##ui64
typedef unsigned __int64 uint_64t;
# elif defined( __sun ) && defined( ULONG_MAX ) && ULONG_MAX == 0xfffffffful
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# elif defined( __MVS__ )
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned int long long uint_64t;
# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u
# if UINT_MAX == 18446744073709551615u
# define BRG_UI64
# define li_64(h) 0x##h##u
typedef unsigned int uint_64t;
# endif
# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u
# if ULONG_MAX == 18446744073709551615ul
# define BRG_UI64
# define li_64(h) 0x##h##ul
typedef unsigned long uint_64t;
# endif
# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u
# if ULLONG_MAX == 18446744073709551615ull
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u
# if ULONG_LONG_MAX == 18446744073709551615ull
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
# endif
#endif
#if !defined( BRG_UI64 )
# if defined( NEED_UINT_64T )
# error Please define uint_64t as an unsigned 64 bit type in brg_types.h
# endif
#endif
#ifndef RETURN_VALUES
# define RETURN_VALUES
# if defined( DLL_EXPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllexport ) void __stdcall
# define INT_RETURN __declspec( dllexport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllexport__ ) void
# define INT_RETURN __declspec( __dllexport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( DLL_IMPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllimport ) void __stdcall
# define INT_RETURN __declspec( dllimport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllimport__ ) void
# define INT_RETURN __declspec( __dllimport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( __WATCOMC__ )
# define VOID_RETURN void __cdecl
# define INT_RETURN int __cdecl
# else
# define VOID_RETURN void
# define INT_RETURN int
# endif
#endif
/* These defines are used to detect and set the memory alignment of pointers.
Note that offsets are in bytes.
ALIGN_OFFSET(x,n) return the positive or zero offset of
the memory addressed by the pointer 'x'
from an address that is aligned on an
'n' byte boundary ('n' is a power of 2)
ALIGN_FLOOR(x,n) return a pointer that points to memory
that is aligned on an 'n' byte boundary
and is not higher than the memory address
pointed to by 'x' ('n' is a power of 2)
ALIGN_CEIL(x,n) return a pointer that points to memory
that is aligned on an 'n' byte boundary
and is not lower than the memory address
pointed to by 'x' ('n' is a power of 2)
*/
#define ALIGN_OFFSET(x,n) (((ptrint_t)(x)) & ((n) - 1))
#define ALIGN_FLOOR(x,n) ((uint_8t*)(x) - ( ((ptrint_t)(x)) & ((n) - 1)))
#define ALIGN_CEIL(x,n) ((uint_8t*)(x) + (-((ptrint_t)(x)) & ((n) - 1)))
/* These defines are used to declare buffers in a way that allows
faster operations on longer variables to be used. In all these
defines 'size' must be a power of 2 and >= 8. NOTE that the
buffer size is in bytes but the type length is in bits
UNIT_TYPEDEF(x,size) declares a variable 'x' of length
'size' bits
BUFR_TYPEDEF(x,size,bsize) declares a buffer 'x' of length 'bsize'
bytes defined as an array of variables
each of 'size' bits (bsize must be a
multiple of size / 8)
UNIT_CAST(x,size) casts a variable to a type of
length 'size' bits
UPTR_CAST(x,size) casts a pointer to a pointer to a
varaiable of length 'size' bits
*/
#define UI_TYPE(size) uint_##size##t
#define UNIT_TYPEDEF(x,size) typedef UI_TYPE(size) x
#define BUFR_TYPEDEF(x,size,bsize) typedef UI_TYPE(size) x[bsize / (size >> 3)]
#define UNIT_CAST(x,size) ((UI_TYPE(size) )(x))
#define UPTR_CAST(x,size) ((UI_TYPE(size)*)(x))
/* Added by Soeren S. Thomsen (begin) */
#define u8 uint_8t
#define u32 uint_32t
#define u64 uint_64t
/* (end) */
#if defined(__cplusplus)
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,956 +0,0 @@
/* groestl-intr-vperm.h Aug 2011
*
* Groestl implementation with intrinsics using ssse3 instructions.
* Author: Günther A. Roland, Martin Schläffer
*
* Based on the vperm and aes_ni implementations of the hash function Groestl
* by Cagdas Calik <ccalik@metu.edu.tr> http://www.metu.edu.tr/~ccalik/
* Institute of Applied Mathematics, Middle East Technical University, Turkey
*
* This code is placed in the public domain
*/
#include <tmmintrin.h>
#include "grsi.h"
/*define data alignment for different C compilers*/
#if defined(__GNUC__)
#define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
#else
#define DATA_ALIGN16(x) __declspec(align(16)) x
#endif
//#if defined(DECLARE_GLOBAL)
#if 1
#define GLOBAL
#else
#define GLOBAL extern
#endif
//#if defined(DECLARE_IFUN)
#if 1
#define IFUN
#else
#define IFUN extern
#endif
/* global constants */
//GLOBAL __m128i grsiROUND_CONST_Lx;
//GLOBAL __m128i grsiROUND_CONST_L0[grsiROUNDS512];
//GLOBAL __m128i grsiROUND_CONST_L7[grsiROUNDS512];
DATA_ALIGN16(int32_t grsiSUBSH_MASK_short[8*4]) = {
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
0x04030201, 0x08070605, 0x0c0b0a09, 0x000f0e0d,
0x05040302, 0x09080706, 0x0d0c0b0a, 0x01000f0e,
0x06050403, 0x0a090807, 0x0e0d0c0b, 0x0201000f,
0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0x03020100,
0x08070605, 0x0c0b0a09, 0x000f0e0d, 0x04030201,
0x09080706, 0x0d0c0b0a, 0x01000f0e, 0x05040302,
0x0e0d0c0b, 0x0201000f, 0x06050403, 0x0a090807
};
GLOBAL __m128i *grsiSUBSH_MASK = grsiSUBSH_MASK_short;
GLOBAL __m128i grsiALL_0F = {0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f};
GLOBAL __m128i grsiALL_1B = {0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b};
GLOBAL __m128i grsiALL_FF = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff};
/* global unsknown */
GLOBAL __m128i grsiVPERM_OPT[2];
GLOBAL __m128i grsiVPERM_INV[2];
GLOBAL __m128i grsiVPERM_SB1[2];
GLOBAL __m128i grsiVPERM_SB2[2];
GLOBAL __m128i grsiVPERM_SB4[2];
GLOBAL __m128i grsiVPERM_SBO[2];
/* state vars */
GLOBAL __m128i grsiTRANSP_MASK;
GLOBAL __m128i grsiVPERM_IPT[2];
GLOBAL __m128i grsiALL_15;
GLOBAL __m128i grsiALL_63;
GLOBAL __m128i grsiROUND_CONST_P[grsiROUNDS1024];
GLOBAL __m128i grsiROUND_CONST_Q[grsiROUNDS1024];
#define grsitos(a) #a
#define grsitostr(a) grsitos(a)
/*
grsiALL_1B = _mm_set_epi32(0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b);\
grsiALL_63 = _mm_set_epi32(0x63636363, 0x63636363, 0x63636363, 0x63636363);\
*/
#define grsiSET_SHARED_CONSTANTS(){\
grsiTRANSP_MASK = _mm_set_epi32(0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800);\
grsiALL_0F = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f);\
grsiALL_15 = _mm_set_epi32(0x15151515, 0x15151515, 0x15151515, 0x15151515);\
\
grsiVPERM_IPT[0] = _mm_set_epi32(0xCD80B1FC, 0xB0FDCC81, 0x4C01307D, 0x317C4D00);\
grsiVPERM_IPT[1] = _mm_set_epi32(0xCABAE090, 0x52227808, 0xC2B2E898, 0x5A2A7000);\
grsiVPERM_OPT[0] = _mm_set_epi32(0xE10D5DB1, 0xB05C0CE0, 0x01EDBD51, 0x50BCEC00);\
grsiVPERM_OPT[1] = _mm_set_epi32(0xF7974121, 0xDEBE6808, 0xFF9F4929, 0xD6B66000);\
grsiVPERM_INV[0] = _mm_set_epi32(0x030D0E0C, 0x02050809, 0x01040A06, 0x0F0B0780);\
grsiVPERM_INV[1] = _mm_set_epi32(0x04070309, 0x0A0B0C02, 0x0E05060F, 0x0D080180);\
grsiVPERM_SB1[0] = _mm_set_epi32(0x3BF7CCC1, 0x0D2ED9EF, 0x3618D415, 0xFAE22300);\
grsiVPERM_SB1[1] = _mm_set_epi32(0xA5DF7A6E, 0x142AF544, 0xB19BE18F, 0xCB503E00);\
grsiVPERM_SB2[0] = _mm_set_epi32(0xC2A163C8, 0xAB82234A, 0x69EB8840, 0x0AE12900);\
grsiVPERM_SB2[1] = _mm_set_epi32(0x5EB7E955, 0xBC982FCD, 0xE27A93C6, 0x0B712400);\
grsiVPERM_SB4[0] = _mm_set_epi32(0xBA44FE79, 0x876D2914, 0x3D50AED7, 0xC393EA00);\
grsiVPERM_SB4[1] = _mm_set_epi32(0xA876DE97, 0x49087E9F, 0xE1E937A0, 0x3FD64100);\
}/**/
/* grsiVPERM
* Transform w/o settings c*
* transforms 2 rows to/from "vperm mode"
* this function is derived from:
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0, a1 = 2 rows
* table = transformation table to use
* t*, c* = clobbers
* outputs:
* a0, a1 = 2 rows transformed with table
* */
#define grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2){\
t0 = c0;\
t1 = c0;\
t0 = _mm_andnot_si128(t0, a0);\
t1 = _mm_andnot_si128(t1, a1);\
t0 = _mm_srli_epi32(t0, 4);\
t1 = _mm_srli_epi32(t1, 4);\
a0 = _mm_and_si128(a0, c0);\
a1 = _mm_and_si128(a1, c0);\
t2 = c2;\
t3 = c2;\
t2 = _mm_shuffle_epi8(t2, a0);\
t3 = _mm_shuffle_epi8(t3, a1);\
a0 = c1;\
a1 = c1;\
a0 = _mm_shuffle_epi8(a0, t0);\
a1 = _mm_shuffle_epi8(a1, t1);\
a0 = _mm_xor_si128(a0, t2);\
a1 = _mm_xor_si128(a1, t3);\
}/**/
#define grsiVPERM_Transform_Set_Const(table, c0, c1, c2){\
c0 = grsiALL_0F;\
c1 = ((__m128i*) table )[0];\
c2 = ((__m128i*) table )[1];\
}/**/
/* grsiVPERM
* Transform
* transforms 2 rows to/from "vperm mode"
* this function is derived from:
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0, a1 = 2 rows
* table = transformation table to use
* t*, c* = clobbers
* outputs:
* a0, a1 = 2 rows transformed with table
* */
#define grsiVPERM_Transform(a0, a1, table, t0, t1, t2, t3, c0, c1, c2){\
grsiVPERM_Transform_Set_Const(table, c0, c1, c2);\
grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2);\
}/**/
/* grsiVPERM
* Transform State
* inputs:
* a0-a3 = state
* table = transformation table to use
* t* = clobbers
* outputs:
* a0-a3 = transformed state
* */
#define grsiVPERM_Transform_State(a0, a1, a2, a3, table, t0, t1, t2, t3, c0, c1, c2){\
grsiVPERM_Transform_Set_Const(table, c0, c1, c2);\
grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2);\
grsiVPERM_Transform_No_Const(a2, a3, t0, t1, t2, t3, c0, c1, c2);\
}/**/
/* grsiVPERM
* Add Constant to State
* inputs:
* a0-a7 = state
* constant = constant to add
* t0 = clobber
* outputs:
* a0-a7 = state + constant
* */
#define grsiVPERM_Add_Constant(a0, a1, a2, a3, a4, a5, a6, a7, constant, t0){\
t0 = constant;\
a0 = _mm_xor_si128(a0, t0);\
a1 = _mm_xor_si128(a1, t0);\
a2 = _mm_xor_si128(a2, t0);\
a3 = _mm_xor_si128(a3, t0);\
a4 = _mm_xor_si128(a4, t0);\
a5 = _mm_xor_si128(a5, t0);\
a6 = _mm_xor_si128(a6, t0);\
a7 = _mm_xor_si128(a7, t0);\
}/**/
/* grsiVPERM
* Set Substitute Core Constants
* */
#define grsiVPERM_Substitute_Core_Set_Const(c0, c1, c2){\
grsiVPERM_Transform_Set_Const(grsiVPERM_INV, c0, c1, c2);\
}/**/
/* grsiVPERM
* Substitute Core
* first part of sbox inverse computation
* this function is derived from:
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0 = 1 row
* t*, c* = clobbers
* outputs:
* b0a, b0b = inputs for lookup step
* */
#define grsiVPERM_Substitute_Core(a0, b0a, b0b, t0, t1, c0, c1, c2){\
t0 = c0;\
t0 = _mm_andnot_si128(t0, a0);\
t0 = _mm_srli_epi32(t0, 4);\
a0 = _mm_and_si128(a0, c0);\
b0a = c1;\
b0a = _mm_shuffle_epi8(b0a, a0);\
a0 = _mm_xor_si128(a0, t0);\
b0b = c2;\
b0b = _mm_shuffle_epi8(b0b, t0);\
b0b = _mm_xor_si128(b0b, b0a);\
t1 = c2;\
t1 = _mm_shuffle_epi8(t1, a0);\
t1 = _mm_xor_si128(t1, b0a);\
b0a = c2;\
b0a = _mm_shuffle_epi8(b0a, b0b);\
b0a = _mm_xor_si128(b0a, a0);\
b0b = c2;\
b0b = _mm_shuffle_epi8(b0b, t1);\
b0b = _mm_xor_si128(b0b, t0);\
}/**/
/* grsiVPERM
* Lookup
* second part of sbox inverse computation
* this function is derived from:
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0a, a0b = output of Substitution Core
* table = lookup table to use (*1 / *2 / *4)
* t0 = clobber
* outputs:
* b0 = output of sbox + multiplication
* */
#define grsiVPERM_Lookup(a0a, a0b, table, b0, t0){\
b0 = ((__m128i*) table )[0];\
t0 = ((__m128i*) table )[1];\
b0 = _mm_shuffle_epi8(b0, a0b);\
t0 = _mm_shuffle_epi8(t0, a0a);\
b0 = _mm_xor_si128(b0, t0);\
}/**/
/* grsiVPERM
* SubBytes and *2 / *4
* this function is derived from:
* Constant-time SSSE3 AES core implementation
* by Mike Hamburg
* and
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0-a7 = state
* t*, c* = clobbers
* outputs:
* a0-a7 = state * 4
* c2 = row0 * 2 -> b0
* c1 = row7 * 2 -> b3
* c0 = row7 * 1 -> b4
* t2 = row4 * 1 -> b7
* TEMP_MUL1 = row(i) * 1
* TEMP_MUL2 = row(i) * 2
*
* call:grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, b1, b2, b5, b6, b0, b3, b4, b7) */
#define grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t3, t4, c2, c1, c0, t2){\
/* set Constants */\
grsiVPERM_Substitute_Core_Set_Const(c0, c1, c2);\
/* row 1 */\
grsiVPERM_Substitute_Core(a1, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[1] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[1] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a1, t4);\
/* --- */\
/* row 2 */\
grsiVPERM_Substitute_Core(a2, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[2] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[2] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a2, t4);\
/* --- */\
/* row 3 */\
grsiVPERM_Substitute_Core(a3, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[3] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[3] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a3, t4);\
/* --- */\
/* row 5 */\
grsiVPERM_Substitute_Core(a5, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[5] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[5] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a5, t4);\
/* --- */\
/* row 6 */\
grsiVPERM_Substitute_Core(a6, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[6] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[6] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a6, t4);\
/* --- */\
/* row 7 */\
grsiVPERM_Substitute_Core(a7, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[7] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, c1, t4); /*c1 -> b3*/\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a7, t4);\
/* --- */\
/* row 4 */\
grsiVPERM_Substitute_Core(a4, t0, t1, t3, t4, c0, (grsiVPERM_INV[0]), c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4); /*t2 -> b7*/\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[4] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a4, t4);\
/* --- */\
/* row 0 */\
grsiVPERM_Substitute_Core(a0, t0, t1, t3, t4, c0, (grsiVPERM_INV[0]), c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, c0, t4); /*c0 -> b4*/\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, c2, t4); /*c2 -> b0*/\
TEMP_MUL2[0] = c2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a0, t4);\
/* --- */\
}/**/
/* Optimized grsiMixBytes
* inputs:
* a0-a7 = (row0-row7) * 4
* b0 = row0 * 2
* b3 = row7 * 2
* b4 = row7 * 1
* b7 = row4 * 1
* all *1 and *2 values must also be in TEMP_MUL1, TEMP_MUL2
* output: b0-b7
* */
#define grsiMixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
/* save one value */\
TEMP_MUL4 = a3;\
/* 1 */\
b1 = a0;\
b1 = _mm_xor_si128(b1, a5);\
b1 = _mm_xor_si128(b1, b4); /* -> helper! */\
b1 = _mm_xor_si128(b1, (TEMP_MUL2[3]));\
b2 = b1;\
\
/* 2 */\
b5 = a1;\
b5 = _mm_xor_si128(b5, a4);\
b5 = _mm_xor_si128(b5, b7); /* -> helper! */\
b5 = _mm_xor_si128(b5, b3); /* -> helper! */\
b6 = b5;\
\
/* 4 */\
b7 = _mm_xor_si128(b7, a6);\
/*b7 = _mm_xor_si128(b7, (TEMP_MUL1[4])); -> helper! */\
b7 = _mm_xor_si128(b7, (TEMP_MUL1[6]));\
b7 = _mm_xor_si128(b7, (TEMP_MUL2[1]));\
b7 = _mm_xor_si128(b7, b3); /* -> helper! */\
b2 = _mm_xor_si128(b2, b7);\
\
/* 3 */\
b0 = _mm_xor_si128(b0, a7);\
b0 = _mm_xor_si128(b0, (TEMP_MUL1[5]));\
b0 = _mm_xor_si128(b0, (TEMP_MUL1[7]));\
/*b0 = _mm_xor_si128(b0, (TEMP_MUL2[0])); -> helper! */\
b0 = _mm_xor_si128(b0, (TEMP_MUL2[2]));\
b3 = b0;\
b1 = _mm_xor_si128(b1, b0);\
b0 = _mm_xor_si128(b0, b7); /* moved from 4 */\
\
/* 5 */\
b4 = _mm_xor_si128(b4, a2);\
/*b4 = _mm_xor_si128(b4, (TEMP_MUL1[0])); -> helper! */\
b4 = _mm_xor_si128(b4, (TEMP_MUL1[2]));\
b4 = _mm_xor_si128(b4, (TEMP_MUL2[3]));\
b4 = _mm_xor_si128(b4, (TEMP_MUL2[5]));\
b3 = _mm_xor_si128(b3, b4);\
b6 = _mm_xor_si128(b6, b4);\
\
/* 6 */\
a3 = _mm_xor_si128(a3, (TEMP_MUL1[1]));\
a3 = _mm_xor_si128(a3, (TEMP_MUL1[3]));\
a3 = _mm_xor_si128(a3, (TEMP_MUL2[4]));\
a3 = _mm_xor_si128(a3, (TEMP_MUL2[6]));\
b4 = _mm_xor_si128(b4, a3);\
b5 = _mm_xor_si128(b5, a3);\
b7 = _mm_xor_si128(b7, a3);\
\
/* 7 */\
a1 = _mm_xor_si128(a1, (TEMP_MUL1[1]));\
a1 = _mm_xor_si128(a1, (TEMP_MUL2[4]));\
b2 = _mm_xor_si128(b2, a1);\
b3 = _mm_xor_si128(b3, a1);\
\
/* 8 */\
a5 = _mm_xor_si128(a5, (TEMP_MUL1[5]));\
a5 = _mm_xor_si128(a5, (TEMP_MUL2[0]));\
b6 = _mm_xor_si128(b6, a5);\
b7 = _mm_xor_si128(b7, a5);\
\
/* 9 */\
a3 = TEMP_MUL1[2];\
a3 = _mm_xor_si128(a3, (TEMP_MUL2[5]));\
b0 = _mm_xor_si128(b0, a3);\
b5 = _mm_xor_si128(b5, a3);\
\
/* 10 */\
a1 = TEMP_MUL1[6];\
a1 = _mm_xor_si128(a1, (TEMP_MUL2[1]));\
b1 = _mm_xor_si128(b1, a1);\
b4 = _mm_xor_si128(b4, a1);\
\
/* 11 */\
a5 = TEMP_MUL1[3];\
a5 = _mm_xor_si128(a5, (TEMP_MUL2[6]));\
b1 = _mm_xor_si128(b1, a5);\
b6 = _mm_xor_si128(b6, a5);\
\
/* 12 */\
a3 = TEMP_MUL1[7];\
a3 = _mm_xor_si128(a3, (TEMP_MUL2[2]));\
b2 = _mm_xor_si128(b2, a3);\
b5 = _mm_xor_si128(b5, a3);\
\
/* 13 */\
b0 = _mm_xor_si128(b0, (TEMP_MUL4));\
b0 = _mm_xor_si128(b0, a4);\
b1 = _mm_xor_si128(b1, a4);\
b3 = _mm_xor_si128(b3, a6);\
b4 = _mm_xor_si128(b4, a0);\
b4 = _mm_xor_si128(b4, a7);\
b5 = _mm_xor_si128(b5, a0);\
b7 = _mm_xor_si128(b7, a2);\
}/**/
/*
grsiSUBSH_MASK[0] = _mm_set_epi32(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);\
grsiSUBSH_MASK[1] = _mm_set_epi32(0x000f0e0d, 0x0c0b0a09, 0x08070605, 0x04030201);\
grsiSUBSH_MASK[2] = _mm_set_epi32(0x01000f0e, 0x0d0c0b0a, 0x09080706, 0x05040302);\
grsiSUBSH_MASK[3] = _mm_set_epi32(0x0201000f, 0x0e0d0c0b, 0x0a090807, 0x06050403);\
grsiSUBSH_MASK[4] = _mm_set_epi32(0x03020100, 0x0f0e0d0c, 0x0b0a0908, 0x07060504);\
grsiSUBSH_MASK[5] = _mm_set_epi32(0x04030201, 0x000f0e0d, 0x0c0b0a09, 0x08070605);\
grsiSUBSH_MASK[6] = _mm_set_epi32(0x05040302, 0x01000f0e, 0x0d0c0b0a, 0x09080706);\
grsiSUBSH_MASK[7] = _mm_set_epi32(0x0a090807, 0x06050403, 0x0201000f, 0x0e0d0c0b);\
*/
#define grsiSET_CONSTANTS(){\
grsiSET_SHARED_CONSTANTS();\
grsiALL_FF = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);\
for(i = 0; i < grsiROUNDS1024; i++)\
{\
grsiROUND_CONST_P[i] = _mm_set_epi32(0xf0e0d0c0 ^ (i * 0x01010101), 0xb0a09080 ^ (i * 0x01010101), 0x70605040 ^ (i * 0x01010101), 0x30201000 ^ (i * 0x01010101));\
grsiROUND_CONST_Q[i] = _mm_set_epi32(0x0f1f2f3f ^ (i * 0x01010101), 0x4f5f6f7f ^ (i * 0x01010101), 0x8f9fafbf ^ (i * 0x01010101), 0xcfdfefff ^ (i * 0x01010101));\
}\
}/**/
/* one round
* a0-a7 = input rows
* b0-b7 = output rows
*/
#define grsiSUBMIX(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
/* SubBytes + Multiplication */\
grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, b1, b2, b5, b6, b0, b3, b4, b7);\
/* grsiMixBytes */\
grsiMixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\
}/**/
#define grsiROUNDS_P(){\
u32 round_counter;\
for(round_counter = 0; round_counter < 14; round_counter+=2) {\
/* AddRoundConstant P1024 */\
xmm8 = _mm_xor_si128(xmm8, (grsiROUND_CONST_P[round_counter]));\
/* ShiftBytes P1024 + pre-AESENCLAST */\
xmm8 = _mm_shuffle_epi8(xmm8, (grsiSUBSH_MASK[0]));\
xmm9 = _mm_shuffle_epi8(xmm9, (grsiSUBSH_MASK[1]));\
xmm10 = _mm_shuffle_epi8(xmm10, (grsiSUBSH_MASK[2]));\
xmm11 = _mm_shuffle_epi8(xmm11, (grsiSUBSH_MASK[3]));\
xmm12 = _mm_shuffle_epi8(xmm12, (grsiSUBSH_MASK[4]));\
xmm13 = _mm_shuffle_epi8(xmm13, (grsiSUBSH_MASK[5]));\
xmm14 = _mm_shuffle_epi8(xmm14, (grsiSUBSH_MASK[6]));\
xmm15 = _mm_shuffle_epi8(xmm15, (grsiSUBSH_MASK[7]));\
/* SubBytes + grsiMixBytes */\
grsiSUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
grsiVPERM_Add_Constant(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, grsiALL_15, xmm8);\
\
/* AddRoundConstant P1024 */\
xmm0 = _mm_xor_si128(xmm0, (grsiROUND_CONST_P[round_counter+1]));\
/* ShiftBytes P1024 + pre-AESENCLAST */\
xmm0 = _mm_shuffle_epi8(xmm0, (grsiSUBSH_MASK[0]));\
xmm1 = _mm_shuffle_epi8(xmm1, (grsiSUBSH_MASK[1]));\
xmm2 = _mm_shuffle_epi8(xmm2, (grsiSUBSH_MASK[2]));\
xmm3 = _mm_shuffle_epi8(xmm3, (grsiSUBSH_MASK[3]));\
xmm4 = _mm_shuffle_epi8(xmm4, (grsiSUBSH_MASK[4]));\
xmm5 = _mm_shuffle_epi8(xmm5, (grsiSUBSH_MASK[5]));\
xmm6 = _mm_shuffle_epi8(xmm6, (grsiSUBSH_MASK[6]));\
xmm7 = _mm_shuffle_epi8(xmm7, (grsiSUBSH_MASK[7]));\
/* SubBytes + grsiMixBytes */\
grsiSUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm0);\
}\
}/**/
#define grsiROUNDS_Q(){\
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm1);\
u32 round_counter = 0;\
for(round_counter = 0; round_counter < 14; round_counter+=2) {\
/* AddRoundConstant Q1024 */\
xmm1 = grsiALL_FF;\
xmm8 = _mm_xor_si128(xmm8, xmm1);\
xmm9 = _mm_xor_si128(xmm9, xmm1);\
xmm10 = _mm_xor_si128(xmm10, xmm1);\
xmm11 = _mm_xor_si128(xmm11, xmm1);\
xmm12 = _mm_xor_si128(xmm12, xmm1);\
xmm13 = _mm_xor_si128(xmm13, xmm1);\
xmm14 = _mm_xor_si128(xmm14, xmm1);\
xmm15 = _mm_xor_si128(xmm15, (grsiROUND_CONST_Q[round_counter]));\
/* ShiftBytes Q1024 + pre-AESENCLAST */\
xmm8 = _mm_shuffle_epi8(xmm8, (grsiSUBSH_MASK[1]));\
xmm9 = _mm_shuffle_epi8(xmm9, (grsiSUBSH_MASK[3]));\
xmm10 = _mm_shuffle_epi8(xmm10, (grsiSUBSH_MASK[5]));\
xmm11 = _mm_shuffle_epi8(xmm11, (grsiSUBSH_MASK[7]));\
xmm12 = _mm_shuffle_epi8(xmm12, (grsiSUBSH_MASK[0]));\
xmm13 = _mm_shuffle_epi8(xmm13, (grsiSUBSH_MASK[2]));\
xmm14 = _mm_shuffle_epi8(xmm14, (grsiSUBSH_MASK[4]));\
xmm15 = _mm_shuffle_epi8(xmm15, (grsiSUBSH_MASK[6]));\
/* SubBytes + grsiMixBytes */\
grsiSUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
\
/* AddRoundConstant Q1024 */\
xmm9 = grsiALL_FF;\
xmm0 = _mm_xor_si128(xmm0, xmm9);\
xmm1 = _mm_xor_si128(xmm1, xmm9);\
xmm2 = _mm_xor_si128(xmm2, xmm9);\
xmm3 = _mm_xor_si128(xmm3, xmm9);\
xmm4 = _mm_xor_si128(xmm4, xmm9);\
xmm5 = _mm_xor_si128(xmm5, xmm9);\
xmm6 = _mm_xor_si128(xmm6, xmm9);\
xmm7 = _mm_xor_si128(xmm7, (grsiROUND_CONST_Q[round_counter+1]));\
/* ShiftBytes Q1024 + pre-AESENCLAST */\
xmm0 = _mm_shuffle_epi8(xmm0, (grsiSUBSH_MASK[1]));\
xmm1 = _mm_shuffle_epi8(xmm1, (grsiSUBSH_MASK[3]));\
xmm2 = _mm_shuffle_epi8(xmm2, (grsiSUBSH_MASK[5]));\
xmm3 = _mm_shuffle_epi8(xmm3, (grsiSUBSH_MASK[7]));\
xmm4 = _mm_shuffle_epi8(xmm4, (grsiSUBSH_MASK[0]));\
xmm5 = _mm_shuffle_epi8(xmm5, (grsiSUBSH_MASK[2]));\
xmm6 = _mm_shuffle_epi8(xmm6, (grsiSUBSH_MASK[4]));\
xmm7 = _mm_shuffle_epi8(xmm7, (grsiSUBSH_MASK[6]));\
/* SubBytes + grsiMixBytes*/ \
grsiSUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
}\
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm1);\
}/**/
/* Matrix Transpose
* input is a 1024-bit state with two columns in one xmm
* output is a 1024-bit state with two rows in one xmm
* inputs: i0-i7
* outputs: i0-i7
* clobbers: t0-t7
*/
#define grsiMatrix_Transpose(i0, i1, i2, i3, i4, i5, i6, i7, t0, t1, t2, t3, t4, t5, t6, t7){\
t0 = grsiTRANSP_MASK;\
\
i6 = _mm_shuffle_epi8(i6, t0);\
i0 = _mm_shuffle_epi8(i0, t0);\
i1 = _mm_shuffle_epi8(i1, t0);\
i2 = _mm_shuffle_epi8(i2, t0);\
i3 = _mm_shuffle_epi8(i3, t0);\
t1 = i2;\
i4 = _mm_shuffle_epi8(i4, t0);\
i5 = _mm_shuffle_epi8(i5, t0);\
t2 = i4;\
t3 = i6;\
i7 = _mm_shuffle_epi8(i7, t0);\
\
/* continue with unpack using 4 temp registers */\
t0 = i0;\
t2 = _mm_unpackhi_epi16(t2, i5);\
i4 = _mm_unpacklo_epi16(i4, i5);\
t3 = _mm_unpackhi_epi16(t3, i7);\
i6 = _mm_unpacklo_epi16(i6, i7);\
t0 = _mm_unpackhi_epi16(t0, i1);\
t1 = _mm_unpackhi_epi16(t1, i3);\
i2 = _mm_unpacklo_epi16(i2, i3);\
i0 = _mm_unpacklo_epi16(i0, i1);\
\
/* shuffle with immediate */\
t0 = _mm_shuffle_epi32(t0, 216);\
t1 = _mm_shuffle_epi32(t1, 216);\
t2 = _mm_shuffle_epi32(t2, 216);\
t3 = _mm_shuffle_epi32(t3, 216);\
i0 = _mm_shuffle_epi32(i0, 216);\
i2 = _mm_shuffle_epi32(i2, 216);\
i4 = _mm_shuffle_epi32(i4, 216);\
i6 = _mm_shuffle_epi32(i6, 216);\
\
/* continue with unpack */\
t4 = i0;\
i0 = _mm_unpacklo_epi32(i0, i2);\
t4 = _mm_unpackhi_epi32(t4, i2);\
t5 = t0;\
t0 = _mm_unpacklo_epi32(t0, t1);\
t5 = _mm_unpackhi_epi32(t5, t1);\
t6 = i4;\
i4 = _mm_unpacklo_epi32(i4, i6);\
t7 = t2;\
t6 = _mm_unpackhi_epi32(t6, i6);\
i2 = t0;\
t2 = _mm_unpacklo_epi32(t2, t3);\
i3 = t0;\
t7 = _mm_unpackhi_epi32(t7, t3);\
\
/* there are now 2 rows in each xmm */\
/* unpack to get 1 row of CV in each xmm */\
i1 = i0;\
i1 = _mm_unpackhi_epi64(i1, i4);\
i0 = _mm_unpacklo_epi64(i0, i4);\
i4 = t4;\
i3 = _mm_unpackhi_epi64(i3, t2);\
i5 = t4;\
i2 = _mm_unpacklo_epi64(i2, t2);\
i6 = t5;\
i5 = _mm_unpackhi_epi64(i5, t6);\
i7 = t5;\
i4 = _mm_unpacklo_epi64(i4, t6);\
i7 = _mm_unpackhi_epi64(i7, t7);\
i6 = _mm_unpacklo_epi64(i6, t7);\
/* transpose done */\
}/**/
/* Matrix Transpose Inverse
* input is a 1024-bit state with two rows in one xmm
* output is a 1024-bit state with two columns in one xmm
* inputs: i0-i7
* outputs: (i0, o0, i1, i3, o1, o2, i5, i7)
* clobbers: t0-t4
*/
#define grsiMatrix_Transpose_INV(i0, i1, i2, i3, i4, i5, i6, i7, o0, o1, o2, t0, t1, t2, t3, t4){\
/* transpose matrix to get output format */\
o1 = i0;\
i0 = _mm_unpacklo_epi64(i0, i1);\
o1 = _mm_unpackhi_epi64(o1, i1);\
t0 = i2;\
i2 = _mm_unpacklo_epi64(i2, i3);\
t0 = _mm_unpackhi_epi64(t0, i3);\
t1 = i4;\
i4 = _mm_unpacklo_epi64(i4, i5);\
t1 = _mm_unpackhi_epi64(t1, i5);\
t2 = i6;\
o0 = grsiTRANSP_MASK;\
i6 = _mm_unpacklo_epi64(i6, i7);\
t2 = _mm_unpackhi_epi64(t2, i7);\
/* load transpose mask into a register, because it will be used 8 times */\
i0 = _mm_shuffle_epi8(i0, o0);\
i2 = _mm_shuffle_epi8(i2, o0);\
i4 = _mm_shuffle_epi8(i4, o0);\
i6 = _mm_shuffle_epi8(i6, o0);\
o1 = _mm_shuffle_epi8(o1, o0);\
t0 = _mm_shuffle_epi8(t0, o0);\
t1 = _mm_shuffle_epi8(t1, o0);\
t2 = _mm_shuffle_epi8(t2, o0);\
/* continue with unpack using 4 temp registers */\
t3 = i4;\
o2 = o1;\
o0 = i0;\
t4 = t1;\
\
t3 = _mm_unpackhi_epi16(t3, i6);\
i4 = _mm_unpacklo_epi16(i4, i6);\
o0 = _mm_unpackhi_epi16(o0, i2);\
i0 = _mm_unpacklo_epi16(i0, i2);\
o2 = _mm_unpackhi_epi16(o2, t0);\
o1 = _mm_unpacklo_epi16(o1, t0);\
t4 = _mm_unpackhi_epi16(t4, t2);\
t1 = _mm_unpacklo_epi16(t1, t2);\
/* shuffle with immediate */\
i4 = _mm_shuffle_epi32(i4, 216);\
t3 = _mm_shuffle_epi32(t3, 216);\
o1 = _mm_shuffle_epi32(o1, 216);\
o2 = _mm_shuffle_epi32(o2, 216);\
i0 = _mm_shuffle_epi32(i0, 216);\
o0 = _mm_shuffle_epi32(o0, 216);\
t1 = _mm_shuffle_epi32(t1, 216);\
t4 = _mm_shuffle_epi32(t4, 216);\
/* continue with unpack */\
i1 = i0;\
i3 = o0;\
i5 = o1;\
i7 = o2;\
i0 = _mm_unpacklo_epi32(i0, i4);\
i1 = _mm_unpackhi_epi32(i1, i4);\
o0 = _mm_unpacklo_epi32(o0, t3);\
i3 = _mm_unpackhi_epi32(i3, t3);\
o1 = _mm_unpacklo_epi32(o1, t1);\
i5 = _mm_unpackhi_epi32(i5, t1);\
o2 = _mm_unpacklo_epi32(o2, t4);\
i7 = _mm_unpackhi_epi32(i7, t4);\
/* transpose done */\
}/**/
/* transform round constants into grsiVPERM mode */
#define grsiVPERM_Transform_RoundConst_CNT2(i, j){\
xmm0 = grsiROUND_CONST_P[i];\
xmm1 = grsiROUND_CONST_P[j];\
xmm2 = grsiROUND_CONST_Q[i];\
xmm3 = grsiROUND_CONST_Q[j];\
grsiVPERM_Transform_State(xmm0, xmm1, xmm2, xmm3, grsiVPERM_IPT, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10);\
xmm2 = _mm_xor_si128(xmm2, (grsiALL_15));\
xmm3 = _mm_xor_si128(xmm3, (grsiALL_15));\
grsiROUND_CONST_P[i] = xmm0;\
grsiROUND_CONST_P[j] = xmm1;\
grsiROUND_CONST_Q[i] = xmm2;\
grsiROUND_CONST_Q[j] = xmm3;\
}/**/
/* transform round constants into grsiVPERM mode */
#define grsiVPERM_Transform_RoundConst(){\
grsiVPERM_Transform_RoundConst_CNT2(0, 1);\
grsiVPERM_Transform_RoundConst_CNT2(2, 3);\
grsiVPERM_Transform_RoundConst_CNT2(4, 5);\
grsiVPERM_Transform_RoundConst_CNT2(6, 7);\
grsiVPERM_Transform_RoundConst_CNT2(8, 9);\
grsiVPERM_Transform_RoundConst_CNT2(10, 11);\
grsiVPERM_Transform_RoundConst_CNT2(12, 13);\
xmm0 = grsiALL_FF;\
grsiVPERM_Transform(xmm0, xmm1, grsiVPERM_IPT, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10);\
xmm0 = _mm_xor_si128(xmm0, (grsiALL_15));\
grsiALL_FF = xmm0;\
}/**/
IFUN void grsiINIT(u64* h)
#if !defined(DECLARE_IFUN)
;
#else
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
/* transform round constants into grsiVPERM mode */
grsiVPERM_Transform_RoundConst();
/* load IV into registers xmm8 - xmm15 */
xmm8 = chaining[0];
xmm9 = chaining[1];
xmm10 = chaining[2];
xmm11 = chaining[3];
xmm12 = chaining[4];
xmm13 = chaining[5];
xmm14 = chaining[6];
xmm15 = chaining[7];
/* transform chaining value from column ordering into row ordering */
grsiVPERM_Transform_State(xmm8, xmm9, xmm10, xmm11, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
grsiVPERM_Transform_State(xmm12, xmm13, xmm14, xmm15, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
grsiMatrix_Transpose(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
/* store transposed IV */
chaining[0] = xmm8;
chaining[1] = xmm9;
chaining[2] = xmm10;
chaining[3] = xmm11;
chaining[4] = xmm12;
chaining[5] = xmm13;
chaining[6] = xmm14;
chaining[7] = xmm15;
}
#endif
IFUN void grsiTF1024(u64* h, u64* m)
#if !defined(DECLARE_IFUN)
;
#else
{
__m128i* const chaining = (__m128i*) h;
__m128i* const message = (__m128i*) m;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP_MUL1[8];
static __m128i TEMP_MUL2[8];
static __m128i TEMP_MUL4;
static __m128i QTEMP[8];
/* load message into registers xmm8 - xmm15 (Q = message) */
xmm8 = message[0];
xmm9 = message[1];
xmm10 = message[2];
xmm11 = message[3];
xmm12 = message[4];
xmm13 = message[5];
xmm14 = message[6];
xmm15 = message[7];
/* transform message M from column ordering into row ordering */
grsiVPERM_Transform_State(xmm8, xmm9, xmm10, xmm11, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
grsiVPERM_Transform_State(xmm12, xmm13, xmm14, xmm15, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
grsiMatrix_Transpose(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
/* store message M (Q input) for later */
QTEMP[0] = xmm8;
QTEMP[1] = xmm9;
QTEMP[2] = xmm10;
QTEMP[3] = xmm11;
QTEMP[4] = xmm12;
QTEMP[5] = xmm13;
QTEMP[6] = xmm14;
QTEMP[7] = xmm15;
/* xor CV to message to get P input */
/* result: CV+M in xmm8...xmm15 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
/* compute permutation P */
/* result: P(CV+M) in xmm8...xmm15 */
grsiROUNDS_P();
/* xor CV to P output (feed-forward) */
/* result: P(CV+M)+CV in xmm8...xmm15 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
/* store P(CV+M)+CV */
chaining[0] = xmm8;
chaining[1] = xmm9;
chaining[2] = xmm10;
chaining[3] = xmm11;
chaining[4] = xmm12;
chaining[5] = xmm13;
chaining[6] = xmm14;
chaining[7] = xmm15;
/* load message M (Q input) into xmm8-15 */
xmm8 = QTEMP[0];
xmm9 = QTEMP[1];
xmm10 = QTEMP[2];
xmm11 = QTEMP[3];
xmm12 = QTEMP[4];
xmm13 = QTEMP[5];
xmm14 = QTEMP[6];
xmm15 = QTEMP[7];
/* compute permutation Q */
/* result: Q(M) in xmm8...xmm15 */
grsiROUNDS_Q();
/* xor Q output */
/* result: P(CV+M)+CV+Q(M) in xmm8...xmm15 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
/* store CV */
chaining[0] = xmm8;
chaining[1] = xmm9;
chaining[2] = xmm10;
chaining[3] = xmm11;
chaining[4] = xmm12;
chaining[5] = xmm13;
chaining[6] = xmm14;
chaining[7] = xmm15;
return;
}
#endif
IFUN void grsiOF1024(u64* h)
#if !defined(DECLARE_IFUN)
;
#else
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP_MUL1[8];
static __m128i TEMP_MUL2[8];
static __m128i TEMP_MUL4;
/* load CV into registers xmm8 - xmm15 */
xmm8 = chaining[0];
xmm9 = chaining[1];
xmm10 = chaining[2];
xmm11 = chaining[3];
xmm12 = chaining[4];
xmm13 = chaining[5];
xmm14 = chaining[6];
xmm15 = chaining[7];
/* compute permutation P */
/* result: P(CV) in xmm8...xmm15 */
grsiROUNDS_P();
/* xor CV to P output (feed-forward) */
/* result: P(CV)+CV in xmm8...xmm15 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
/* transpose CV back from row ordering to column ordering */
/* result: final hash value in xmm0, xmm6, xmm13, xmm15 */
grsiMatrix_Transpose_INV(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm4, xmm0, xmm6, xmm1, xmm2, xmm3, xmm5, xmm7);
grsiVPERM_Transform_State(xmm0, xmm6, xmm13, xmm15, grsiVPERM_OPT, xmm1, xmm2, xmm3, xmm5, xmm7, xmm10, xmm12);
/* we only need to return the truncated half of the state */
chaining[4] = xmm0;
chaining[5] = xmm6;
chaining[6] = xmm13;
chaining[7] = xmm15;
return;
}
#endif

View File

@@ -1,273 +0,0 @@
/* hash.c Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#include "grsi.h"
#include "grsi-asm.h"
/* void grsiInit(grsiState* ctx) { */
#define GRS_I \
do { \
grsiState *ctx = &sts_grs; \
u8 i = 0; \
\
/* set number of state columns and state size depending on \
variant */ \
ctx->grsicolumns = grsiCOLS; \
ctx->grsistatesize = grsiSIZE; \
ctx->grsiv = LONG; \
\
grsiSET_CONSTANTS(); \
\
memset(ctx->grsichaining, 0, sizeof(u64)*grsiSIZE/8); \
memset(ctx->grsibuffer, 0, sizeof(grsiBitSequence)*grsiSIZE); \
\
if (ctx->grsichaining == NULL || ctx->grsibuffer == NULL) \
return; \
\
/* set initial value */ \
ctx->grsichaining[ctx->grsicolumns-1] = grsiU64BIG((u64)grsiLENGTH); \
\
grsiINIT(ctx->grsichaining); \
\
/* set other variables */ \
ctx->grsibuf_ptr = 0; \
ctx->grsiblock_counter = 0; \
ctx->grsibits_in_last_byte = 0; \
\
} while (0)
/* digest up to len bytes of input (full blocks only) */
void grsiTransform(grsiState *ctx,
const u8 *in,
unsigned long long len) {
/* increment block counter */
ctx->grsiblock_counter += len/grsiSIZE;
/* digest message, one block at a time */
for (; len >= grsiSIZE; len -= grsiSIZE, in += grsiSIZE)
grsiTF1024((u64*)ctx->grsichaining, (u64*)in);
asm volatile ("emms");
}
/* given state h, do h <- P(h)+h */
void grsiOutputTransformation(grsiState *ctx) {
/* determine variant */
grsiOF1024((u64*)ctx->grsichaining);
asm volatile ("emms");
}
/* initialise context */
void grsiInit(grsiState* ctx) {
u8 i = 0;
/* output size (in bits) must be a positive integer less than or
equal to 512, and divisible by 8 */
if (grsiLENGTH <= 0 || (grsiLENGTH%8) || grsiLENGTH > 512)
return;
/* set number of state columns and state size depending on
variant */
ctx->grsicolumns = grsiCOLS;
ctx->grsistatesize = grsiSIZE;
ctx->grsiv = LONG;
grsiSET_CONSTANTS();
for (i=0; i<grsiSIZE/8; i++)
ctx->grsichaining[i] = 0;
for (i=0; i<grsiSIZE; i++)
ctx->grsibuffer[i] = 0;
if (ctx->grsichaining == NULL || ctx->grsibuffer == NULL)
return;
/* set initial value */
ctx->grsichaining[ctx->grsicolumns-1] = grsiU64BIG((u64)grsiLENGTH);
grsiINIT(ctx->grsichaining);
/* set other variables */
ctx->grsibuf_ptr = 0;
ctx->grsiblock_counter = 0;
ctx->grsibits_in_last_byte = 0;
return;
}
/* update state with databitlen bits of input */
void grsiUpdate(grsiState* ctx,
const grsiBitSequence* input,
grsiDataLength databitlen) {
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->grsibits_in_last_byte) return;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->grsibuf_ptr) {
while (ctx->grsibuf_ptr < ctx->grsistatesize && index < msglen) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
}
if (ctx->grsibuf_ptr < ctx->grsistatesize) {
/* buffer still not full, return */
if (rem) {
ctx->grsibits_in_last_byte = rem;
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->grsibuf_ptr = 0;
printf("error\n");
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
}
/* digest bulk of message */
grsiTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->grsistatesize)*ctx->grsistatesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->grsibits_in_last_byte = rem;
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
}
return;
}
/* update state with databitlen bits of input */
void grsiUpdateq(grsiState* ctx, const grsiBitSequence* input)
{
grsiDataLength databitlen= 64*8;
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->grsibits_in_last_byte) return;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->grsibuf_ptr) {
while (ctx->grsibuf_ptr < ctx->grsistatesize && index < msglen) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
}
if (ctx->grsibuf_ptr < ctx->grsistatesize) {
/* buffer still not full, return */
if (rem) {
ctx->grsibits_in_last_byte = rem;
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->grsibuf_ptr = 0;
printf("error\n");
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
}
/* digest bulk of message */
grsiTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->grsistatesize)*ctx->grsistatesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->grsibits_in_last_byte = rem;
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
}
return;
}
#define BILB ctx->grsibits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
void grsiFinal(grsiState* ctx,
grsiBitSequence* output) {
int i, j = 0, grsibytelen = grsiLENGTH/8;
u8 *s = (grsiBitSequence*)ctx->grsichaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->grsibuffer[(int)ctx->grsibuf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
else ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0x80;
/* pad with '0'-bits */
if (ctx->grsibuf_ptr > ctx->grsistatesize-grsiLENGTHFIELDLEN) {
/* padding requires two blocks */
while (ctx->grsibuf_ptr < ctx->grsistatesize) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0;
}
/* digest first padding block */
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
ctx->grsibuf_ptr = 0;
}
while (ctx->grsibuf_ptr < ctx->grsistatesize-grsiLENGTHFIELDLEN) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0;
}
/* length padding */
ctx->grsiblock_counter++;
ctx->grsibuf_ptr = ctx->grsistatesize;
while (ctx->grsibuf_ptr > ctx->grsistatesize-grsiLENGTHFIELDLEN) {
ctx->grsibuffer[(int)--ctx->grsibuf_ptr] = (u8)ctx->grsiblock_counter;
ctx->grsiblock_counter >>= 8;
}
/* digest final padding block */
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
/* perform output transformation */
grsiOutputTransformation(ctx);
/* store hash result in output */
for (i = ctx->grsistatesize-grsibytelen; i < ctx->grsistatesize; i++,j++) {
output[j] = s[i];
}
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < ctx->grsicolumns; i++) {
ctx->grsichaining[i] = 0;
}
for (i = 0; i < ctx->grsistatesize; i++) {
ctx->grsibuffer[i] = 0;
}
// free(ctx->grsichaining);
// free(ctx->grsibuffer);
return;
}

View File

@@ -1,79 +0,0 @@
/* hash.h Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#ifndef __grsi_h
#define __grsi_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#define grsiLENGTH 512
/* some sizes (number of bytes) */
#define grsiROWS 8
#define grsiLENGTHFIELDLEN grsiROWS
#define grsiCOLS512 8
#define grsiCOLS1024 16
#define grsiSIZE512 (grsiROWS*grsiCOLS512)
#define grsiSIZE1024 (grsiROWS*grsiCOLS1024)
#define grsiROUNDS512 10
#define grsiROUNDS1024 14
#if grsiLENGTH<=256
#define grsiCOLS grsiCOLS512
#define grsiSIZE grsiSIZE512
#define grsiROUNDS grsiROUNDS512
#else
#define grsiCOLS grsiCOLS1024
#define grsiSIZE grsiSIZE1024
#define grsiROUNDS grsiROUNDS1024
#endif
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#define grsiEXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
#define grsiU64BIG(a) (a)
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define grsiEXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define grsiU64BIG(a) \
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
(ROTL64(a,56) & li_64(FF000000FF000000)))
#endif /* IS_LITTLE_ENDIAN */
typedef enum { LONG, SHORT } grsiVar;
/* NIST API begin */
typedef unsigned char grsiBitSequence;
typedef unsigned long long grsiDataLength;
typedef struct {
__attribute__ ((aligned (32))) u64 grsichaining[grsiSIZE/8]; /* actual state */
__attribute__ ((aligned (32))) grsiBitSequence grsibuffer[grsiSIZE]; /* data buffer */
u64 grsiblock_counter; /* message block counter */
int grsibuf_ptr; /* data buffer pointer */
int grsibits_in_last_byte; /* no. of message bits in last byte of
data buffer */
int grsicolumns; /* no. of columns in state */
int grsistatesize; /* total no. of bytes in state */
grsiVar grsiv; /* LONG or SHORT */
} grsiState;
void grsiInit(grsiState*);
void grsiUpdate(grsiState*, const grsiBitSequence*, grsiDataLength);
void grsiFinal(grsiState*, grsiBitSequence*);
/* NIST API end */
#endif /* __hash_h */

File diff suppressed because it is too large Load Diff

View File

@@ -1,247 +0,0 @@
/* hash.c Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#include "grsn-asm.h"
/* digest up to len bytes of input (full blocks only) */
void grsnTransform(grsnState *ctx,
const u8 *in,
unsigned long long len) {
/* increment block counter */
ctx->block_counter += len/grsnSIZE;
/* digest message, one block at a time */
for (; len >= grsnSIZE; len -= grsnSIZE, in += grsnSIZE)
#if grsnLENGTH<=256
TF512((u64*)ctx->chaining, (u64*)in);
#else
TF1024((u64*)ctx->chaining, (u64*)in);
#endif
asm volatile ("emms");
}
/* given state h, do h <- P(h)+h */
void grsnOutputTransformation(grsnState *ctx) {
/* determine variant */
#if (grsnLENGTH <= 256)
OF512((u64*)ctx->chaining);
#else
OF1024((u64*)ctx->chaining);
#endif
asm volatile ("emms");
}
/* initialise context */
void grsnInit(grsnState* ctx) {
u8 i = 0;
/* output size (in bits) must be a positive integer less than or
equal to 512, and divisible by 8 */
if (grsnLENGTH <= 0 || (grsnLENGTH%8) || grsnLENGTH > 512)
return;
/* set number of state columns and state size depending on
variant */
ctx->columns = grsnCOLS;
ctx->statesize = grsnSIZE;
#if (grsnLENGTH <= 256)
ctx->v = SHORT;
#else
ctx->v = LONG;
#endif
SET_CONSTANTS();
for (i=0; i<grsnSIZE/8; i++)
ctx->chaining[i] = 0;
for (i=0; i<grsnSIZE; i++)
ctx->buffer[i] = 0;
if (ctx->chaining == NULL || ctx->buffer == NULL)
return;
/* set initial value */
ctx->chaining[ctx->columns-1] = U64BIG((u64)grsnLENGTH);
INIT(ctx->chaining);
/* set other variables */
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->bits_in_last_byte = 0;
return;
}
/* update state with databitlen bits of input */
void grsnUpdate(grsnState* ctx,
const BitSequence* input,
DataLength databitlen) {
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->bits_in_last_byte) return;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->buf_ptr) {
while (ctx->buf_ptr < ctx->statesize && index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
if (ctx->buf_ptr < ctx->statesize) {
/* buffer still not full, return */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->buf_ptr = 0;
printf("error\n");
grsnTransform(ctx, ctx->buffer, ctx->statesize);
}
/* digest bulk of message */
grsnTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
/* update state with databitlen bits of input */
void grsnUpdateq(grsnState* ctx, const BitSequence* input)
{
int index = 0;
int msglen = (int)((64*8)/8);
int rem = (int)((64*8)%8);
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->buf_ptr) {
while (ctx->buf_ptr < ctx->statesize && index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
if (ctx->buf_ptr < ctx->statesize) {
/* buffer still not full, return */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->buf_ptr = 0;
printf("error\n");
grsnTransform(ctx, ctx->buffer, ctx->statesize);
}
/* digest bulk of message */
grsnTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
#define BILB ctx->bits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
void grsnFinal(grsnState* ctx,
BitSequence* output) {
int i, j = 0, grsnbytelen = grsnLENGTH/8;
u8 *s = (BitSequence*)ctx->chaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
/* pad with '0'-bits */
if (ctx->buf_ptr > ctx->statesize-grsnLENGTHFIELDLEN) {
/* padding requires two blocks */
while (ctx->buf_ptr < ctx->statesize) {
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* digest first padding block */
grsnTransform(ctx, ctx->buffer, ctx->statesize);
ctx->buf_ptr = 0;
}
while (ctx->buf_ptr < ctx->statesize-grsnLENGTHFIELDLEN) {
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* length padding */
ctx->block_counter++;
ctx->buf_ptr = ctx->statesize;
while (ctx->buf_ptr > ctx->statesize-grsnLENGTHFIELDLEN) {
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
ctx->block_counter >>= 8;
}
/* digest final padding block */
grsnTransform(ctx, ctx->buffer, ctx->statesize);
/* perform output transformation */
grsnOutputTransformation(ctx);
/* store hash result in output */
for (i = ctx->statesize-grsnbytelen; i < ctx->statesize; i++,j++) {
output[j] = s[i];
}
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < ctx->columns; i++) {
ctx->chaining[i] = 0;
}
for (i = 0; i < ctx->statesize; i++) {
ctx->buffer[i] = 0;
}
// free(ctx->chaining);
// free(ctx->buffer);
return;
}

View File

@@ -1,80 +0,0 @@
/* hash.h Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#ifndef __grsn_h
#define __grsn_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#ifndef grsnLENGTH
#define grsnLENGTH 512
#endif
/* some sizes (number of bytes) */
#define grsnROWS 8
#define grsnLENGTHFIELDLEN grsnROWS
#define grsnCOLS512 8
#define grsnCOLS1024 16
#define grsnSIZE512 (grsnROWS*grsnCOLS512)
#define grsnSIZE1024 (grsnROWS*grsnCOLS1024)
#define grsnROUNDS512 10
#define grsnROUNDS1024 14
#if grsnLENGTH<=256
#define grsnCOLS grsnCOLS512
#define grsnSIZE grsnSIZE512
#define grsnROUNDS grsnROUNDS512
#else
#define grsnCOLS grsnCOLS1024
#define grsnSIZE grsnSIZE1024
#define grsnROUNDS grsnROUNDS1024
#endif
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
#define U64BIG(a) (a)
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define U64BIG(a) \
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
(ROTL64(a,56) & li_64(FF000000FF000000)))
#endif /* IS_LITTLE_ENDIAN */
typedef enum { LONG, SHORT } Var;
/* NIST API begin */
typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
typedef struct {
__attribute__ ((aligned (32))) u64 chaining[grsnSIZE/8]; /* actual state */
__attribute__ ((aligned (32))) BitSequence buffer[grsnSIZE]; /* data buffer */
u64 block_counter; /* message block counter */
int buf_ptr; /* data buffer pointer */
int bits_in_last_byte; /* no. of message bits in last byte of
data buffer */
int columns; /* no. of columns in state */
int statesize; /* total no. of bytes in state */
Var v; /* LONG or SHORT */
} grsnState;
void grsnInit(grsnState*);
void grsnUpdate(grsnState*, const BitSequence*, DataLength);
void grsnFinal(grsnState*, BitSequence*);
#endif /* __hash_h */

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +0,0 @@
#ifndef GRSOASM_H
#define GRSOASM_H
#include "grso.h"
void grsoP1024ASM (u64 *x) ;
void grsoQ1024ASM (u64 *x) ;
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +0,0 @@
#ifndef GRSOASM_H
#define GRSOASM_H
/* really same as the mmx asm.h */
/* made just in case something must be changed */
#include "grso.h"
void grsoP1024ASM (u64 *x) ;
void grsoQ1024ASM (u64 *x) ;
#endif

View File

@@ -1,110 +0,0 @@
/* hash.c January 2011
*
* Groestl-512 implementation with inline assembly containing mmx and
* sse instructions. Optimized for Opteron.
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
*
* This code is placed in the public domain
*/
//#include "grso.h"
//#include "grso-asm.h"
// #include "grsotab.h"
#define DECL_GRS
/* load initial constants */
#define GRS_I \
do { \
int i; \
/* set initial value */ \
for (i = 0; i < grsoCOLS-1; i++) sts_grs.grsstate[i] = 0; \
sts_grs.grsstate[grsoCOLS-1] = grsoU64BIG((u64)(8*grsoDIGESTSIZE)); \
\
/* set other variables */ \
sts_grs.grsbuf_ptr = 0; \
sts_grs.grsblock_counter = 0; \
} while (0); \
/* load hash */
#define GRS_U \
do { \
unsigned char* in = hash; \
unsigned long long index = 0; \
\
/* if the buffer contains data that has not yet been digested, first \
add data to buffer until full */ \
if (sts_grs.grsbuf_ptr) { \
while (sts_grs.grsbuf_ptr < grsoSIZE && index < 64) { \
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
} \
if (sts_grs.grsbuf_ptr < grsoSIZE) continue; \
\
/* digest buffer */ \
sts_grs.grsbuf_ptr = 0; \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
} \
\
/* digest bulk of message */ \
grsoTransform(&sts_grs, in+index, 64-index); \
index += ((64-index)/grsoSIZE)*grsoSIZE; \
\
/* store remaining data in buffer */ \
while (index < 64) { \
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
} \
\
} while (0);
/* groestl512 hash loaded */
/* hash = groestl512(loaded) */
#define GRS_C \
do { \
char *out = hash; \
int i, j = 0; \
unsigned char *s = (unsigned char*)sts_grs.grsstate; \
\
hashbuf[sts_grs.grsbuf_ptr++] = 0x80; \
\
/* pad with '0'-bits */ \
if (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
/* padding requires two blocks */ \
while (sts_grs.grsbuf_ptr < grsoSIZE) { \
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
} \
/* digest first padding block */ \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
sts_grs.grsbuf_ptr = 0; \
} \
while (sts_grs.grsbuf_ptr < grsoSIZE-grsoLENGTHFIELDLEN) { \
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
} \
\
/* length padding */ \
sts_grs.grsblock_counter++; \
sts_grs.grsbuf_ptr = grsoSIZE; \
while (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
hashbuf[--sts_grs.grsbuf_ptr] = (unsigned char)sts_grs.grsblock_counter; \
sts_grs.grsblock_counter >>= 8; \
} \
\
/* digest final padding block */ \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
/* perform output transformation */ \
grsoOutputTransformation(&sts_grs); \
\
/* store hash result in output */ \
for (i = grsoSIZE-grsoDIGESTSIZE; i < grsoSIZE; i++,j++) { \
out[j] = s[i]; \
} \
\
/* zeroise relevant variables and deallocate memory */ \
for (i = 0; i < grsoCOLS; i++) { \
sts_grs.grsstate[i] = 0; \
} \
for (i = 0; i < grsoSIZE; i++) { \
hashbuf[i] = 0; \
} \
} while (0);

View File

@@ -1,57 +0,0 @@
/* hash.c January 2011
*
* Groestl-512 implementation with inline assembly containing mmx and
* sse instructions. Optimized for Opteron.
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
*
* This code is placed in the public domain
*/
#include "algo/groestl/sse2/grso-asm.h"
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grsotab.h"
/* digest up to len bytes of input (full blocks only) */
void grsoTransform(grsoState *ctx,
const unsigned char *in,
unsigned long long len) {
u64 y[grsoCOLS+2] __attribute__ ((aligned (16)));
u64 z[grsoCOLS+2] __attribute__ ((aligned (16)));
u64 *m, *h = (u64*)ctx->grsstate;
int i;
/* increment block counter */
ctx->grsblock_counter += len/grsoSIZE;
/* digest message, one block at a time */
for (; len >= grsoSIZE; len -= grsoSIZE, in += grsoSIZE) {
m = (u64*)in;
for (i = 0; i < grsoCOLS; i++) {
y[i] = m[i];
z[i] = m[i] ^ h[i];
}
grsoQ1024ASM(y);
grsoP1024ASM(z);
/* h' == h + Q(m) + P(h+m) */
for (i = 0; i < grsoCOLS; i++) {
h[i] ^= z[i] ^ y[i];
}
}
}
/* given state h, do h <- P(h)+h */
void grsoOutputTransformation(grsoState *ctx) {
u64 z[grsoCOLS] __attribute__ ((aligned (16)));
int j;
for (j = 0; j < grsoCOLS; j++) {
z[j] = ctx->grsstate[j];
}
grsoP1024ASM(z);
for (j = 0; j < grsoCOLS; j++) {
ctx->grsstate[j] ^= z[j];
}
}

View File

@@ -1,62 +0,0 @@
#ifndef __hash_h
#define __hash_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#include "brg_types.h"
/* some sizes (number of bytes) */
#define grsoROWS 8
#define grsoLENGTHFIELDLEN grsoROWS
#define grsoCOLS 16
#define grsoSIZE (grsoROWS*grsoCOLS)
#define grsoDIGESTSIZE 64
#define grsoROUNDS 14
#define grsoROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&((u64)0xffffffffffffffffULL))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#error
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define grsoU64BIG(a) \
((grsoROTL64(a, 8) & ((u64)0x000000ff000000ffULL)) | \
(grsoROTL64(a,24) & ((u64)0x0000ff000000ff00ULL)) | \
(grsoROTL64(a,40) & ((u64)0x00ff000000ff0000ULL)) | \
(grsoROTL64(a,56) & ((u64)0xff000000ff000000ULL)))
#endif /* IS_LITTLE_ENDIAN */
typedef struct {
u64 grsstate[grsoCOLS]; /* actual state */
u64 grsblock_counter; /* message block counter */
int grsbuf_ptr; /* data buffer pointer */
} grsoState;
//extern int grsoInit(grsoState* ctx);
//extern int grsoUpdate(grsoState* ctx, const unsigned char* in,
// unsigned long long len);
//extern int grsoUpdateq(grsoState* ctx, const unsigned char* in);
//extern int grsoFinal(grsoState* ctx,
// unsigned char* out);
//
//extern int grsohash(unsigned char *out,
// const unsigned char *in,
// unsigned long long len);
/* digest up to len bytes of input (full blocks only) */
void grsoTransform( grsoState *ctx, const unsigned char *in,
unsigned long long len );
/* given state h, do h <- P(h)+h */
void grsoOutputTransformation( grsoState *ctx );
int grso_init ( grsoState* sts_grs );
int grso_update ( grsoState* sts_grs, char* hashbuf, char* hash );
int grso_close ( grsoState *sts_grs, char* hashbuf, char* hash );
#endif /* __hash_h */

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,45 +0,0 @@
/*
* file : hash_api.h
* version : 1.0.208
* date : 14.12.2010
*
* Grostl multi-stream bitsliced implementation Hash API
*
* Cagdas Calik
* ccalik@metu.edu.tr
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
*
*/
#ifndef GRSS_API_H
#define GRSS_API_H
#include "sha3_common.h"
#include <tmmintrin.h>
typedef struct
{
__m128i state1[8];
__m128i state2[8];
__m128i state3[8];
__m128i state4[8];
__m128i _Pconst[14][8];
__m128i _Qconst[14][8];
__m128i _shiftconst[8];
unsigned int uHashLength;
unsigned int uBlockLength;
BitSequence buffer[128];
} grssState;
void grssInit(grssState *state, int grssbitlen);
void grssUpdate(grssState *state, const BitSequence *data, DataLength databitlen);
void grssFinal(grssState *state, BitSequence *grssval);
#endif // HASH_API_H

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,202 +0,0 @@
/* hash.c Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#include "grsv.h"
#include "grsv-asm.h"
/* digest up to len bytes of input (full blocks only) */
void grsvTransform(grsvState *ctx,
const u8 *in,
unsigned long long len) {
/* increment block counter */
ctx->grsvblock_counter += len/grsvSIZE;
/* digest message, one block at a time */
for (; len >= grsvSIZE; len -= grsvSIZE, in += grsvSIZE)
#if grsvLENGTH<=256
grsvTF512((u64*)ctx->grsvchaining, (u64*)in);
#else
grsvTF1024((u64*)ctx->grsvchaining, (u64*)in);
#endif
asm volatile ("emms");
}
/* given state h, do h <- P(h)+h */
void grsvOutputTransformation(grsvState *ctx) {
/* determine variant */
#if (grsvLENGTH <= 256)
grsvOF512((u64*)ctx->grsvchaining);
#else
grsvOF1024((u64*)ctx->grsvchaining);
#endif
asm volatile ("emms");
}
/* initialise context */
void grsvInit(grsvState* ctx) {
u8 i = 0;
/* output size (in bits) must be a positive integer less than or
equal to 512, and divisible by 8 */
if (grsvLENGTH <= 0 || (grsvLENGTH%8) || grsvLENGTH > 512)
return;
/* set number of state columns and state size depending on
variant */
ctx->grsvcolumns = grsvCOLS;
ctx->grsvstatesize = grsvSIZE;
#if (grsvLENGTH <= 256)
ctx->grsvv = SHORT;
#else
ctx->grsvv = LONG;
#endif
SET_CONSTANTS();
for (i=0; i<grsvSIZE/8; i++)
ctx->grsvchaining[i] = 0;
for (i=0; i<grsvSIZE; i++)
ctx->grsvbuffer[i] = 0;
if (ctx->grsvchaining == NULL || ctx->grsvbuffer == NULL)
return;
/* set initial value */
ctx->grsvchaining[ctx->grsvcolumns-1] = U64BIG((u64)grsvLENGTH);
grsvINIT(ctx->grsvchaining);
/* set other variables */
ctx->grsvbuf_ptr = 0;
ctx->grsvblock_counter = 0;
ctx->grsvbits_in_last_byte = 0;
return;
}
/* update state with databitlen bits of input */
void grsvUpdate(grsvState* ctx,
const grsvBitSequence* input,
grsvDataLength databitlen) {
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->grsvbits_in_last_byte) return;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->grsvbuf_ptr) {
while (ctx->grsvbuf_ptr < ctx->grsvstatesize && index < msglen) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index++];
}
if (ctx->grsvbuf_ptr < ctx->grsvstatesize) {
/* buffer still not full, return */
if (rem) {
ctx->grsvbits_in_last_byte = rem;
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->grsvbuf_ptr = 0;
printf("error\n");
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
}
/* digest bulk of message */
grsvTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->grsvstatesize)*ctx->grsvstatesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->grsvbits_in_last_byte = rem;
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index];
}
return;
}
#define BILB ctx->grsvbits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
void grsvFinal(grsvState* ctx,
grsvBitSequence* output) {
int i, j = 0, grsvbytelen = grsvLENGTH/8;
u8 *s = (grsvBitSequence*)ctx->grsvchaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
else ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0x80;
/* pad with '0'-bits */
if (ctx->grsvbuf_ptr > ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
/* padding requires two blocks */
while (ctx->grsvbuf_ptr < ctx->grsvstatesize) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0;
}
/* digest first padding block */
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
ctx->grsvbuf_ptr = 0;
}
while (ctx->grsvbuf_ptr < ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0;
}
/* length padding */
ctx->grsvblock_counter++;
ctx->grsvbuf_ptr = ctx->grsvstatesize;
while (ctx->grsvbuf_ptr > ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
ctx->grsvbuffer[(int)--ctx->grsvbuf_ptr] = (u8)ctx->grsvblock_counter;
ctx->grsvblock_counter >>= 8;
}
/* digest final padding block */
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
/* perform output transformation */
grsvOutputTransformation(ctx);
/* store hash result in output */
for (i = ctx->grsvstatesize-grsvbytelen; i < ctx->grsvstatesize; i++,j++) {
output[j] = s[i];
}
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < ctx->grsvcolumns; i++) {
ctx->grsvchaining[i] = 0;
}
for (i = 0; i < ctx->grsvstatesize; i++) {
ctx->grsvbuffer[i] = 0;
}
// free(ctx->grsvchaining);
// free(ctx->buffer);
return;
}

View File

@@ -1,77 +0,0 @@
/* hash.h Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#ifndef __grsv_h
#define __grsv_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#define grsvLENGTH 512
/* some sizes (number of bytes) */
#define grsvROWS 8
#define grsvLENGTHFIELDLEN grsvROWS
#define grsvCOLS512 8
#define grsvCOLS1024 16
#define grsvSIZE512 (grsvROWS*grsvCOLS512)
#define grsvSIZE1024 (grsvROWS*grsvCOLS1024)
#define grsvROUNDS512 10
#define grsvROUNDS1024 14
#if grsvLENGTH<=256
#define grsvCOLS grsvCOLS512
#define grsvSIZE grsvSIZE512
#define grsvROUNDS grsvROUNDS512
#else
#define grsvCOLS grsvCOLS1024
#define grsvSIZE grsvSIZE1024
#define grsvROUNDS grsvROUNDS1024
#endif
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
#define U64BIG(a) (a)
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define U64BIG(a) \
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
(ROTL64(a,56) & li_64(FF000000FF000000)))
#endif /* IS_LITTLE_ENDIAN */
typedef enum { LONG, SHORT } grsvVar;
typedef unsigned char grsvBitSequence;
typedef unsigned long long grsvDataLength;
typedef struct {
__attribute__ ((aligned (32))) u64 grsvchaining[grsvSIZE/8]; /* actual state */
__attribute__ ((aligned (32))) grsvBitSequence grsvbuffer[grsvSIZE]; /* data buffer */
u64 grsvblock_counter; /* message block counter */
int grsvbuf_ptr; /* data buffer pointer */
int grsvbits_in_last_byte; /* no. of message bits in last byte of
data buffer */
int grsvcolumns; /* no. of columns in state */
int grsvstatesize; /* total no. of bytes in state */
grsvVar grsvv; /* LONG or SHORT */
} grsvState;
void grsvInit(grsvState*);
void grsvUpdate(grsvState*, const grsvBitSequence*, grsvDataLength);
void grsvFinal(grsvState*, grsvBitSequence*);
#endif /* __grsv_h */

View File

@@ -23,10 +23,7 @@
#include "algo/sha2/sph-sha2.h" #include "algo/sha2/sph-sha2.h"
#include "algo/haval/sph-haval.h" #include "algo/haval/sph-haval.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#endif #endif
@@ -34,38 +31,31 @@
#include "algo/luffa/sse2/luffa_for_sse2.h" #include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/cubehash/sse2/cubehash_sse2.h" #include "algo/cubehash/sse2/cubehash_sse2.h"
#include "algo/simd/sse2/nist.h" #include "algo/simd/sse2/nist.h"
//#include "algo/blake/sse2/blake.c"
//#include "algo/keccak/sse2/keccak.c"
//#include "algo/bmw/sse2/bmw.c"
//#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h" #include "algo/jh/sse2/jh_sse2_opt64.h"
typedef struct { typedef struct {
sph_blake512_context blake1, blake2; sph_blake512_context blake1, blake2;
sph_bmw512_context bmw1, bmw2, bmw3; sph_bmw512_context bmw1, bmw2, bmw3;
sph_skein512_context skein1, skein2; sph_skein512_context skein1, skein2;
sph_jh512_context jh1, jh2; sph_jh512_context jh1, jh2;
sph_keccak512_context keccak1, keccak2; sph_keccak512_context keccak1, keccak2;
// sph_luffa512_context luffa1, luffa2; hashState_luffa luffa1, luffa2;
hashState_luffa luffa1, luffa2; cubehashParam cube;
// sph_cubehash512_context cube1, cube2; sph_shavite512_context shavite1, shavite2;
cubehashParam cube; hashState_sd simd1, simd2;
sph_shavite512_context shavite1, shavite2; sph_hamsi512_context hamsi1;
// sph_simd512_context simd1, simd2; sph_fugue512_context fugue1, fugue2;
hashState_sd simd1, simd2; sph_shabal512_context shabal1;
sph_hamsi512_context hamsi1; sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
sph_fugue512_context fugue1, fugue2; sph_sha512_context sha1, sha2;
sph_shabal512_context shabal1; sph_haval256_5_context haval1, haval2;
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
sph_sha512_context sha1, sha2;
sph_haval256_5_context haval1, haval2;
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512_context groestl1, groestl2; sph_groestl512_context groestl1, groestl2;
sph_echo512_context echo1, echo2; sph_echo512_context echo1, echo2;
#else #else
hashState_echo echo1, echo2; hashState_echo echo1, echo2;
hashState_groestl groestl1, groestl2; hashState_groestl groestl1, groestl2;
#endif #endif
} hmq1725_ctx_holder; } hmq1725_ctx_holder;
@@ -90,19 +80,14 @@ void init_hmq1725_ctx()
sph_keccak512_init(&hmq1725_ctx.keccak1); sph_keccak512_init(&hmq1725_ctx.keccak1);
sph_keccak512_init(&hmq1725_ctx.keccak2); sph_keccak512_init(&hmq1725_ctx.keccak2);
// sph_luffa512_init(&hmq1725_ctx.luffa1);
// sph_luffa512_init(&hmq1725_ctx.luffa2);
init_luffa( &hmq1725_ctx.luffa1, 512 ); init_luffa( &hmq1725_ctx.luffa1, 512 );
init_luffa( &hmq1725_ctx.luffa2, 512 ); init_luffa( &hmq1725_ctx.luffa2, 512 );
// sph_cubehash512_init(&hmq1725_ctx.cubehash1);
cubehashInit( &hmq1725_ctx.cube, 512, 16, 32 ); cubehashInit( &hmq1725_ctx.cube, 512, 16, 32 );
sph_shavite512_init(&hmq1725_ctx.shavite1); sph_shavite512_init(&hmq1725_ctx.shavite1);
sph_shavite512_init(&hmq1725_ctx.shavite2); sph_shavite512_init(&hmq1725_ctx.shavite2);
// sph_simd512_init(&hmq1725_ctx.simd1);
// sph_simd512_init(&hmq1725_ctx.simd2);
init_sd( &hmq1725_ctx.simd1, 512 ); init_sd( &hmq1725_ctx.simd1, 512 );
init_sd( &hmq1725_ctx.simd2, 512 ); init_sd( &hmq1725_ctx.simd2, 512 );
@@ -135,46 +120,18 @@ void init_hmq1725_ctx()
init_groestl( &hmq1725_ctx.groestl1 ); init_groestl( &hmq1725_ctx.groestl1 );
init_groestl( &hmq1725_ctx.groestl2 ); init_groestl( &hmq1725_ctx.groestl2 );
#endif #endif
} }
extern void hmq1725hash(void *state, const void *input) extern void hmq1725hash(void *state, const void *input)
{ {
hmq1725_ctx_holder ctx;
memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
size_t hashptr;
// DATA_ALIGNXY(sph_u64 hashctA,8);
// DATA_ALIGNXY(sph_u64 hashctB,8);
// DATA_ALIGNXY(unsigned char hash[128],16);
unsigned char hashbuf[128];
sph_u64 hashctA;
sph_u64 hashctB;
const uint32_t mask = 24; const uint32_t mask = 24;
uint32_t hashA[25], hashB[25];
hmq1725_ctx_holder ctx;
//these uint512 in the c++ source of the client are backed by an array of uint32 memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
uint32_t hashA[25], hashB[25];
// unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
// #define hashA hash
// #define hashB (hash+64)
sph_bmw512 (&ctx.bmw1, input, 80); //0 sph_bmw512 (&ctx.bmw1, input, 80); //0
sph_bmw512_close(&ctx.bmw1, hashA); //1 sph_bmw512_close(&ctx.bmw1, hashA); //1
/*
DECL_BMW;
BMW_I;
BMW_U;
#define M(x) sph_dec64le_aligned(data + 8 * (x))
#define H(x) (h[x])
#define dH(x) (dh[x])
BMW_C;
#undef M
#undef H
#undef dH
*/
sph_whirlpool (&ctx.whirlpool1, hashA, 64); //0 sph_whirlpool (&ctx.whirlpool1, hashA, 64); //0
sph_whirlpool_close(&ctx.whirlpool1, hashB); //1 sph_whirlpool_close(&ctx.whirlpool1, hashB); //1
@@ -182,8 +139,8 @@ extern void hmq1725hash(void *state, const void *input)
if ( hashB[0] & mask ) //1 if ( hashB[0] & mask ) //1
{ {
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512 (&ctx.groestl1, hashB, 64); //1 sph_groestl512 (&ctx.groestl1, hashB, 64); //1
sph_groestl512_close(&ctx.groestl1, hashA); //2 sph_groestl512_close(&ctx.groestl1, hashA); //2
#else #else
update_groestl( &ctx.groestl1, (char*)hashB, 512 ); update_groestl( &ctx.groestl1, (char*)hashB, 512 );
final_groestl( &ctx.groestl1, (char*)hashA ); final_groestl( &ctx.groestl1, (char*)hashA );
@@ -191,8 +148,8 @@ extern void hmq1725hash(void *state, const void *input)
} }
else else
{ {
sph_skein512 (&ctx.skein1, hashB, 64); //1 sph_skein512 (&ctx.skein1, hashB, 64); //1
sph_skein512_close(&ctx.skein1, hashA); //2 sph_skein512_close(&ctx.skein1, hashA); //2
} }
sph_jh512 (&ctx.jh1, hashA, 64); //3 sph_jh512 (&ctx.jh1, hashA, 64); //3
@@ -212,13 +169,9 @@ extern void hmq1725hash(void *state, const void *input)
sph_bmw512_close(&ctx.bmw2, hashB); //5 sph_bmw512_close(&ctx.bmw2, hashB); //5
} }
// sph_luffa512 (&ctx.luffa1, hashB, 64); //5
// sph_luffa512_close(&ctx.luffa1, hashA); //6
update_luffa( &ctx.luffa1, (BitSequence*)hashB, 512 ); update_luffa( &ctx.luffa1, (BitSequence*)hashB, 512 );
final_luffa( &ctx.luffa1, (BitSequence*)hashA ); final_luffa( &ctx.luffa1, (BitSequence*)hashA );
// sph_cubehash512 (&ctx.cubehash1, hashA, 64); //6
// sph_cubehash512_close(&ctx.cubehash1, hashB); //7
cubehashUpdate( &ctx.cube, (BitSequence *)hashA, 64 ); cubehashUpdate( &ctx.cube, (BitSequence *)hashA, 64 );
cubehashDigest( &ctx.cube, (BitSequence *)hashB ); cubehashDigest( &ctx.cube, (BitSequence *)hashB );
@@ -233,14 +186,11 @@ extern void hmq1725hash(void *state, const void *input)
sph_jh512_close(&ctx.jh2, hashA); //8 sph_jh512_close(&ctx.jh2, hashA); //8
} }
sph_shavite512 (&ctx.shavite1, hashA, 64); //3 sph_shavite512 (&ctx.shavite1, hashA, 64); //3
sph_shavite512_close(&ctx.shavite1, hashB); //4 sph_shavite512_close(&ctx.shavite1, hashB); //4
// sph_simd512 (&ctx.simd1, hashB, 64); //2 update_sd( &ctx.simd1, (BitSequence *)hashB, 512 );
// sph_simd512_close(&ctx.simd1, hashA); //3 final_sd( &ctx.simd1, (BitSequence *)hashA );
update_sd( &ctx.simd1, (BitSequence *)hashB, 512 );
final_sd( &ctx.simd1, (BitSequence *)hashA );
if ( hashA[0] & mask ) //4 if ( hashA[0] & mask ) //4
{ {
@@ -258,8 +208,8 @@ extern void hmq1725hash(void *state, const void *input)
sph_echo512 (&ctx.echo1, hashB, 64); //5 sph_echo512 (&ctx.echo1, hashB, 64); //5
sph_echo512_close(&ctx.echo1, hashA); //6 sph_echo512_close(&ctx.echo1, hashA); //6
#else #else
update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 ); update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 );
final_echo( &ctx.echo1, (BitSequence *)hashA ); final_echo( &ctx.echo1, (BitSequence *)hashA );
#endif #endif
sph_blake512 (&ctx.blake2, hashA, 64); //6 sph_blake512 (&ctx.blake2, hashA, 64); //6
@@ -272,8 +222,6 @@ extern void hmq1725hash(void *state, const void *input)
} }
else else
{ {
// sph_luffa512 (&ctx.luffa2, hashB, 64); //7
// sph_luffa512_close(&ctx.luffa2, hashA); //8
update_luffa( &ctx.luffa2, (BitSequence *)hashB, 512 ); update_luffa( &ctx.luffa2, (BitSequence *)hashB, 512 );
final_luffa( &ctx.luffa2, (BitSequence *)hashA ); final_luffa( &ctx.luffa2, (BitSequence *)hashA );
} }
@@ -287,8 +235,8 @@ extern void hmq1725hash(void *state, const void *input)
if ( hashA[0] & mask ) //4 if ( hashA[0] & mask ) //4
{ {
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_echo512 (&ctx.echo2, hashA, 64); // sph_echo512 (&ctx.echo2, hashA, 64); //
sph_echo512_close(&ctx.echo2, hashB); //5 sph_echo512_close(&ctx.echo2, hashB); //5
#else #else
update_echo ( &ctx.echo2, (BitSequence *)hashA, 512 ); update_echo ( &ctx.echo2, (BitSequence *)hashA, 512 );
final_echo( &ctx.echo2, (BitSequence *)hashB ); final_echo( &ctx.echo2, (BitSequence *)hashB );
@@ -296,8 +244,6 @@ extern void hmq1725hash(void *state, const void *input)
} }
else else
{ {
// sph_simd512 (&ctx.simd2, hashA, 64); //4
// sph_simd512_close(&ctx.simd2, hashB); //5
update_sd( &ctx.simd2, (BitSequence *)hashA, 512 ); update_sd( &ctx.simd2, (BitSequence *)hashA, 512 );
final_sd( &ctx.simd2, (BitSequence *)hashB ); final_sd( &ctx.simd2, (BitSequence *)hashB );
} }
@@ -323,8 +269,8 @@ extern void hmq1725hash(void *state, const void *input)
sph_groestl512 (&ctx.groestl2, hashA, 64); //3 sph_groestl512 (&ctx.groestl2, hashA, 64); //3
sph_groestl512_close(&ctx.groestl2, hashB); //4 sph_groestl512_close(&ctx.groestl2, hashB); //4
#else #else
update_groestl( &ctx.groestl2, (char*)hashA, 512 ); update_groestl( &ctx.groestl2, (char*)hashA, 512 );
final_groestl( &ctx.groestl2, (char*)hashB ); final_groestl( &ctx.groestl2, (char*)hashB );
#endif #endif
sph_sha512 (&ctx.sha2, hashB, 64); //2 sph_sha512 (&ctx.sha2, hashB, 64); //2

View File

@@ -7,6 +7,7 @@
#include <stdio.h> #include <stdio.h>
#include "algo/blake/sph_blake.h" #include "algo/blake/sph_blake.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/skein/sph_skein.h" #include "algo/skein/sph_skein.h"
#include "algo/jh/sph_jh.h" #include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h" #include "algo/keccak/sph_keccak.h"
@@ -16,15 +17,14 @@
#include "algo/skein/sse2/skein.c" #include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h" #include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#endif #endif
typedef struct { typedef struct {
#ifndef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512_context groestl;
#else
hashState_groestl groestl; hashState_groestl groestl;
#endif #endif
} nist5_ctx_holder; } nist5_ctx_holder;
@@ -33,16 +33,15 @@ nist5_ctx_holder nist5_ctx;
void init_nist5_ctx() void init_nist5_ctx()
{ {
#ifndef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512_init( &nist5_ctx.groestl );
#else
init_groestl( &nist5_ctx.groestl ); init_groestl( &nist5_ctx.groestl );
#endif #endif
} }
void nist5hash(void *output, const void *input) void nist5hash(void *output, const void *input)
{ {
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
size_t hashptr; size_t hashptr;
unsigned char hashbuf[128]; unsigned char hashbuf[128];
sph_u64 hashctA; sph_u64 hashctA;
@@ -54,16 +53,14 @@ void nist5hash(void *output, const void *input)
nist5_ctx_holder ctx; nist5_ctx_holder ctx;
memcpy( &ctx, &nist5_ctx, sizeof(nist5_ctx) ); memcpy( &ctx, &nist5_ctx, sizeof(nist5_ctx) );
DECL_BLK; DECL_BLK;
BLK_I; BLK_I;
BLK_W; BLK_W;
BLK_C; BLK_C;
#ifdef NO_AES_NI #ifdef NO_AES_NI
GRS_I; sph_groestl512 (&ctx.groestl, hash, 64);
GRS_U; sph_groestl512_close(&ctx.groestl, hash);
GRS_C;
#else #else
update_groestl( &ctx.groestl, (char*)hash,512); update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash); final_groestl( &ctx.groestl, (char*)hash);

View File

@@ -19,10 +19,7 @@
#include "algo/skein/sse2/skein.c" #include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h" #include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#endif #endif
@@ -36,37 +33,36 @@
#define DATA_ALIGNXY(x,y) __declspec(align(y)) x #define DATA_ALIGNXY(x,y) __declspec(align(y)) x
#endif #endif
#ifndef NO_AES_NI #ifdef NO_AES_NI
hashState_groestl quark_groestl_ctx; sph_groestl512_context quark_ctx;
#else
hashState_groestl quark_ctx;
#endif #endif
void init_quark_ctx() void init_quark_ctx()
{ {
#ifndef NO_AES_NI #ifdef NO_AES_NI
init_groestl( &quark_groestl_ctx ); sph_groestl512_init( &quark_ctx );
#else
init_groestl( &quark_ctx );
#endif #endif
} }
inline static void quarkhash(void *state, const void *input) inline static void quarkhash(void *state, const void *input)
{ {
#ifdef NO_AES_NI
grsoState sts_grs;
#else
hashState_groestl ctx;
memcpy(&ctx, &quark_groestl_ctx, sizeof(quark_groestl_ctx));
#endif
/* shared temp space */
/* hash is really just 64bytes but it used to hold both hash and final round constants passed 64 */
unsigned char hashbuf[128]; unsigned char hashbuf[128];
size_t hashptr; size_t hashptr;
sph_u64 hashctA; sph_u64 hashctA;
sph_u64 hashctB; sph_u64 hashctB;
int i; int i;
unsigned char hash[128]; unsigned char hash[128];
#ifdef NO_AES_NI
sph_groestl512_context ctx;
#else
hashState_groestl ctx;
#endif
memcpy( &ctx, &quark_ctx, sizeof(ctx) );
// Blake // Blake
DECL_BLK; DECL_BLK;
@@ -117,13 +113,13 @@ inline static void quarkhash(void *state, const void *input)
{ {
#ifdef NO_AES_NI #ifdef NO_AES_NI
GRS_I; sph_groestl512_init( &ctx );
GRS_U; sph_groestl512 ( &ctx, hash, 64 );
GRS_C; sph_groestl512_close( &ctx, hash );
#else #else
reinit_groestl( &ctx ); reinit_groestl( &ctx );
update_groestl(&ctx, (char*)hash,512); update_groestl( &ctx, (char*)hash, 512 );
final_groestl(&ctx, (char*)hash); final_groestl( &ctx, (char*)hash );
#endif #endif
} while(0); continue; } while(0); continue;

View File

@@ -371,7 +371,6 @@ extern "C"{
#define DECL_SKN \ #define DECL_SKN \
sph_u64 sknh0, sknh1, sknh2, sknh3, sknh4, sknh5, sknh6, sknh7; \ sph_u64 sknh0, sknh1, sknh2, sknh3, sknh4, sknh5, sknh6, sknh7; \
unsigned char sknbuf[64]; \
#define sknREAD_STATE_BIG(sc) do { \ #define sknREAD_STATE_BIG(sc) do { \
sknh0 = (sc)->sknh0; \ sknh0 = (sc)->sknh0; \
@@ -424,7 +423,6 @@ do { \
do { \ do { \
unsigned char *buf; \ unsigned char *buf; \
size_t ptr; \ size_t ptr; \
unsigned first; \
size_t len = 64; \ size_t len = 64; \
const void *data = hash; \ const void *data = hash; \
buf = hashbuf; \ buf = hashbuf; \
@@ -441,7 +439,6 @@ do { \
unsigned char *buf; \ unsigned char *buf; \
size_t ptr; \ size_t ptr; \
unsigned et; \ unsigned et; \
int i; \
\ \
buf = hashbuf; \ buf = hashbuf; \
ptr = hashptr; \ ptr = hashptr; \

View File

@@ -18,10 +18,7 @@
#include "algo/simd/sph_simd.h" #include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h" #include "algo/echo/sph_echo.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
// #include "algo/echo/sph_echo.h"
// #include "algo/groestl/sph_groestl.h"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#endif #endif

View File

@@ -17,10 +17,7 @@
#include "algo/simd/sph_simd.h" #include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h" #include "algo/echo/sph_echo.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#endif #endif
@@ -40,7 +37,7 @@ typedef struct {
hashState_sd simd; hashState_sd simd;
sph_shavite512_context shavite; sph_shavite512_context shavite;
#ifdef NO_AES_NI #ifdef NO_AES_NI
// sph_groestl512_context groestl; sph_groestl512_context groestl;
sph_echo512_context echo; sph_echo512_context echo;
#else #else
hashState_echo echo; hashState_echo echo;
@@ -57,7 +54,7 @@ void init_x11_ctx()
sph_shavite512_init( &x11_ctx.shavite ); sph_shavite512_init( &x11_ctx.shavite );
init_sd( &x11_ctx.simd, 512 ); init_sd( &x11_ctx.simd, 512 );
#ifdef NO_AES_NI #ifdef NO_AES_NI
// sph_groestl512_init( &x11_ctx.groestl ); sph_groestl512_init( &x11_ctx.groestl );
sph_echo512_init( &x11_ctx.echo ); sph_echo512_init( &x11_ctx.echo );
#else #else
init_echo( &x11_ctx.echo, 512 ); init_echo( &x11_ctx.echo, 512 );
@@ -92,13 +89,8 @@ static void x11_hash( void *state, const void *input )
#undef dH #undef dH
#ifdef NO_AES_NI #ifdef NO_AES_NI
grsoState sts_grs; sph_groestl512 (&ctx.groestl, hash, 64);
GRS_I; sph_groestl512_close(&ctx.groestl, hash);
GRS_U;
GRS_C;
// sph_groestl512 (&ctx.groestl, hash, 64);
// sph_groestl512_close(&ctx.groestl, hash);
#else #else
update_groestl( &ctx.groestl, (char*)hash, 512 ); update_groestl( &ctx.groestl, (char*)hash, 512 );
final_groestl( &ctx.groestl, (char*)hash ); final_groestl( &ctx.groestl, (char*)hash );

View File

@@ -18,10 +18,7 @@
#include "algo/simd/sph_simd.h" #include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h" #include "algo/echo/sph_echo.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
// #include "algo/groestl/sse2/grso.h"
// #include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#endif #endif

View File

@@ -6,6 +6,7 @@
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include "algo/groestl/sph_groestl.h"
#include "algo/gost/sph_gost.h" #include "algo/gost/sph_gost.h"
#include "algo/shavite/sph_shavite.h" #include "algo/shavite/sph_shavite.h"
#include "algo/echo/sph_echo.h" #include "algo/echo/sph_echo.h"
@@ -19,10 +20,7 @@
#include "algo/skein/sse2/skein.c" #include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h" #include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#endif #endif
@@ -34,6 +32,7 @@ typedef struct {
cubehashParam cube; cubehashParam cube;
hashState_sd simd; hashState_sd simd;
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512_context groestl;
sph_echo512_context echo; sph_echo512_context echo;
#else #else
hashState_echo echo; hashState_echo echo;
@@ -51,6 +50,7 @@ void init_sib_ctx()
cubehashInit( &sib_ctx.cube, 512, 16, 32 ); cubehashInit( &sib_ctx.cube, 512, 16, 32 );
init_sd( &sib_ctx.simd, 512 ); init_sd( &sib_ctx.simd, 512 );
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512_init( &sib_ctx.groestl );
sph_echo512_init( &sib_ctx.echo ); sph_echo512_init( &sib_ctx.echo );
#else #else
init_echo( &sib_ctx.echo, 512 ); init_echo( &sib_ctx.echo, 512 );
@@ -59,17 +59,12 @@ void init_sib_ctx()
} }
void sibhash(void *output, const void *input) void sibhash(void *output, const void *input)
{ {
unsigned char hash[128]; // uint32_t hashA[16], hashB[16]; unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
#define hashA hash #define hashA hash
#define hashB hash+64 #define hashB hash+64
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
size_t hashptr; size_t hashptr;
unsigned char hashbuf[128]; unsigned char hashbuf[128];
sph_u64 hashctA; sph_u64 hashctA;
@@ -95,12 +90,11 @@ void sibhash(void *output, const void *input)
#undef dH #undef dH
#ifdef NO_AES_NI #ifdef NO_AES_NI
GRS_I; sph_groestl512 (&ctx.groestl, hash, 64);
GRS_U; sph_groestl512_close(&ctx.groestl, hash);
GRS_C;
#else #else
update_groestl( &ctx.groestl, (char*)hash,512); update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash); final_groestl( &ctx.groestl, (char*)hash);
#endif #endif
DECL_SKN; DECL_SKN;

View File

@@ -29,10 +29,7 @@
#include "algo/skein/sse2/skein.c" #include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h" #include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#endif #endif
@@ -79,9 +76,6 @@ static void x13hash(void *output, const void *input)
x13_ctx_holder ctx; x13_ctx_holder ctx;
memcpy( &ctx, &x13_ctx, sizeof(x13_ctx) ); memcpy( &ctx, &x13_ctx, sizeof(x13_ctx) );
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
// X11 algos // X11 algos
@@ -116,12 +110,8 @@ static void x13hash(void *output, const void *input)
//---groetl---- //---groetl----
#ifdef NO_AES_NI #ifdef NO_AES_NI
// use GRS if possible sph_groestl512 (&ctx.groestl, hash, 64);
GRS_I; sph_groestl512_close(&ctx.groestl, hash);
GRS_U;
GRS_C;
// sph_groestl512 (&ctx.groestl, hash, 64);
// sph_groestl512_close(&ctx.groestl, hash);
#else #else
update_groestl( &ctx.groestl, (char*)hash,512); update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash); final_groestl( &ctx.groestl, (char*)hash);

View File

@@ -31,10 +31,7 @@
#include "algo/skein/sse2/skein.c" #include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h" #include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#endif #endif
@@ -84,10 +81,6 @@ static void x14hash(void *output, const void *input)
x14_ctx_holder ctx; x14_ctx_holder ctx;
memcpy(&ctx, &x14_ctx, sizeof(x14_ctx)); memcpy(&ctx, &x14_ctx, sizeof(x14_ctx));
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
unsigned char hashbuf[128]; unsigned char hashbuf[128];
size_t hashptr; size_t hashptr;
sph_u64 hashctA; sph_u64 hashctA;
@@ -119,12 +112,8 @@ static void x14hash(void *output, const void *input)
//---groestl---- //---groestl----
#ifdef NO_AES_NI #ifdef NO_AES_NI
// use SSE2 optimized GRS if possible sph_groestl512 (&ctx.groestl, hash, 64);
GRS_I; sph_groestl512_close(&ctx.groestl, hash);
GRS_U;
GRS_C;
// sph_groestl512 (&ctx.groestl, hash, 64);
// sph_groestl512_close(&ctx.groestl, hash);
#else #else
update_groestl( &ctx.groestl, (char*)hash,512); update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash); final_groestl( &ctx.groestl, (char*)hash);

View File

@@ -31,10 +31,7 @@
#include "algo/skein/sse2/skein.c" #include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h" #include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#endif #endif
@@ -86,10 +83,6 @@ static void x15hash(void *output, const void *input)
x15_ctx_holder ctx; x15_ctx_holder ctx;
memcpy( &ctx, &x15_ctx, sizeof(x15_ctx) ); memcpy( &ctx, &x15_ctx, sizeof(x15_ctx) );
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
unsigned char hashbuf[128]; unsigned char hashbuf[128];
size_t hashptr; size_t hashptr;
sph_u64 hashctA; sph_u64 hashctA;
@@ -120,14 +113,11 @@ static void x15hash(void *output, const void *input)
//---groestl---- //---groestl----
#ifdef NO_AES_NI #ifdef NO_AES_NI
GRS_I; sph_groestl512(&ctx.groestl, hash, 64);
GRS_U; sph_groestl512_close(&ctx.groestl, hash);
GRS_C;
// sph_groestl512(&ctx.groestl, hash, 64);
// sph_groestl512_close(&ctx.groestl, hash);
#else #else
update_groestl( &ctx.groestl, (char*)hash,512); update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash); final_groestl( &ctx.groestl, (char*)hash);
#endif #endif
//---skein4--- //---skein4---

View File

@@ -33,10 +33,7 @@
#include "algo/skein/sse2/skein.c" #include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h" #include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#endif #endif
@@ -92,10 +89,6 @@ static void x17hash(void *output, const void *input)
x17_ctx_holder ctx; x17_ctx_holder ctx;
memcpy( &ctx, &x17_ctx, sizeof(x17_ctx) ); memcpy( &ctx, &x17_ctx, sizeof(x17_ctx) );
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
unsigned char hashbuf[128]; unsigned char hashbuf[128];
size_t hashptr; size_t hashptr;
sph_u64 hashctA; sph_u64 hashctA;
@@ -126,14 +119,11 @@ static void x17hash(void *output, const void *input)
//---groestl---- //---groestl----
#ifdef NO_AES_NI #ifdef NO_AES_NI
// GRS_I;
// GRS_U;
// GRS_C;
sph_groestl512(&ctx.groestl, hash, 64); sph_groestl512(&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash); sph_groestl512_close(&ctx.groestl, hash);
#else #else
update_groestl( &ctx.groestl, (char*)hash,512); update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash); final_groestl( &ctx.groestl, (char*)hash);
#endif #endif
//---skein4--- //---skein4---

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,821 +0,0 @@
/*
* Copyright 2011-2012 pooler@litecoinpool.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "cpuminer-config.h"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#if defined(__i386__)
.macro scrypt_shuffle src, so, dest, do
movl \so+60(\src), %eax
movl \so+44(\src), %ebx
movl \so+28(\src), %ecx
movl \so+12(\src), %edx
movl %eax, \do+12(\dest)
movl %ebx, \do+28(\dest)
movl %ecx, \do+44(\dest)
movl %edx, \do+60(\dest)
movl \so+40(\src), %eax
movl \so+8(\src), %ebx
movl \so+48(\src), %ecx
movl \so+16(\src), %edx
movl %eax, \do+8(\dest)
movl %ebx, \do+40(\dest)
movl %ecx, \do+16(\dest)
movl %edx, \do+48(\dest)
movl \so+20(\src), %eax
movl \so+4(\src), %ebx
movl \so+52(\src), %ecx
movl \so+36(\src), %edx
movl %eax, \do+4(\dest)
movl %ebx, \do+20(\dest)
movl %ecx, \do+36(\dest)
movl %edx, \do+52(\dest)
movl \so+0(\src), %eax
movl \so+24(\src), %ebx
movl \so+32(\src), %ecx
movl \so+56(\src), %edx
movl %eax, \do+0(\dest)
movl %ebx, \do+24(\dest)
movl %ecx, \do+32(\dest)
movl %edx, \do+56(\dest)
.endm
.macro salsa8_core_gen_quadround
movl 52(%esp), %ecx
movl 4(%esp), %edx
movl 20(%esp), %ebx
movl 8(%esp), %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 4(%esp)
movl 36(%esp), %edi
leal (%edx, %ebx), %ebp
roll $9, %ebp
xorl %ebp, %edi
movl 24(%esp), %ebp
movl %edi, 8(%esp)
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 40(%esp), %ebx
movl %ecx, 20(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 24(%esp)
movl 56(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 36(%esp)
movl 28(%esp), %ecx
movl %edx, 28(%esp)
movl 44(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 60(%esp), %ebx
movl %esi, 40(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 44(%esp)
movl 12(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 12(%esp)
movl 48(%esp), %esi
movl %ebp, 48(%esp)
movl 64(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl 32(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 32(%esp)
movl %ebx, %ecx
movl %edx, 52(%esp)
movl 28(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 40(%esp), %ebx
movl %esi, 28(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 40(%esp)
movl 12(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 12(%esp)
movl 4(%esp), %esi
movl %ebp, 4(%esp)
movl 48(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 48(%esp)
movl 32(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 32(%esp)
movl 24(%esp), %ecx
movl %edx, 24(%esp)
movl 52(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 28(%esp), %ebx
movl %esi, 28(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 52(%esp)
movl 8(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 8(%esp)
movl 44(%esp), %esi
movl %ebp, 44(%esp)
movl 4(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 20(%esp), %ebx
movl %ecx, 4(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl 36(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 20(%esp)
movl %ebx, %ecx
movl %edx, 36(%esp)
movl 24(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 28(%esp), %ebx
movl %esi, 24(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 28(%esp)
xorl %esi, %ebp
movl 8(%esp), %esi
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl 40(%esp), %edi
movl %ebp, 8(%esp)
movl 44(%esp), %ebp
movl %esi, 40(%esp)
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 4(%esp), %ebx
movl %ecx, 44(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 4(%esp)
movl 20(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 56(%esp)
movl 48(%esp), %ecx
movl %edx, 20(%esp)
movl 36(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 24(%esp), %ebx
movl %edi, 24(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 60(%esp)
movl 12(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 12(%esp)
movl 52(%esp), %edi
movl %ebp, 36(%esp)
movl 8(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl 32(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 32(%esp)
movl %ebx, %ecx
movl %edx, 48(%esp)
movl 20(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 24(%esp), %ebx
movl %edi, 20(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 8(%esp)
movl 12(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 12(%esp)
movl 28(%esp), %edi
movl %ebp, 52(%esp)
movl 36(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 28(%esp)
movl 32(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 32(%esp)
movl 4(%esp), %ecx
movl %edx, 4(%esp)
movl 48(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 20(%esp), %ebx
movl %edi, 20(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 48(%esp)
movl 40(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 36(%esp)
movl 60(%esp), %edi
movl %ebp, 24(%esp)
movl 52(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 44(%esp), %ebx
movl %ecx, 40(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 52(%esp)
movl 56(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 56(%esp)
addl %esi, %ebx
movl %edx, 44(%esp)
roll $13, %ebx
xorl %ebx, %edi
movl %edi, 60(%esp)
addl %esi, %edi
roll $18, %edi
xorl %edi, %ebp
movl %ebp, 64(%esp)
.endm
.text
.p2align 5
salsa8_core_gen:
salsa8_core_gen_quadround
salsa8_core_gen_quadround
ret
.text
.p2align 5
.globl scrypt_core
.globl _scrypt_core
scrypt_core:
_scrypt_core:
pushl %ebx
pushl %ebp
pushl %edi
pushl %esi
/* Check for SSE2 availability */
movl $1, %eax
cpuid
andl $0x04000000, %edx
jnz scrypt_core_sse2
scrypt_core_gen:
movl 20(%esp), %edi
movl 24(%esp), %esi
subl $72, %esp
.macro scrypt_core_macro1a p, q
movl \p(%edi), %eax
movl \q(%edi), %edx
movl %eax, \p(%esi)
movl %edx, \q(%esi)
xorl %edx, %eax
movl %eax, \p(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro1b p, q
movl \p(%edi), %eax
xorl \p(%esi, %edx), %eax
movl \q(%edi), %ebx
xorl \q(%esi, %edx), %ebx
movl %ebx, \q(%edi)
xorl %ebx, %eax
movl %eax, \p(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro2 p, q
movl \p(%esp), %eax
addl \p(%edi), %eax
movl %eax, \p(%edi)
xorl \q(%edi), %eax
movl %eax, \q(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro3 p, q
movl \p(%esp), %eax
addl \q(%edi), %eax
movl %eax, \q(%edi)
.endm
leal 131072(%esi), %ecx
scrypt_core_gen_loop1:
movl %esi, 64(%esp)
movl %ecx, 68(%esp)
scrypt_core_macro1a 0, 64
scrypt_core_macro1a 4, 68
scrypt_core_macro1a 8, 72
scrypt_core_macro1a 12, 76
scrypt_core_macro1a 16, 80
scrypt_core_macro1a 20, 84
scrypt_core_macro1a 24, 88
scrypt_core_macro1a 28, 92
scrypt_core_macro1a 32, 96
scrypt_core_macro1a 36, 100
scrypt_core_macro1a 40, 104
scrypt_core_macro1a 44, 108
scrypt_core_macro1a 48, 112
scrypt_core_macro1a 52, 116
scrypt_core_macro1a 56, 120
scrypt_core_macro1a 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro2 0, 64
scrypt_core_macro2 4, 68
scrypt_core_macro2 8, 72
scrypt_core_macro2 12, 76
scrypt_core_macro2 16, 80
scrypt_core_macro2 20, 84
scrypt_core_macro2 24, 88
scrypt_core_macro2 28, 92
scrypt_core_macro2 32, 96
scrypt_core_macro2 36, 100
scrypt_core_macro2 40, 104
scrypt_core_macro2 44, 108
scrypt_core_macro2 48, 112
scrypt_core_macro2 52, 116
scrypt_core_macro2 56, 120
scrypt_core_macro2 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro3 0, 64
scrypt_core_macro3 4, 68
scrypt_core_macro3 8, 72
scrypt_core_macro3 12, 76
scrypt_core_macro3 16, 80
scrypt_core_macro3 20, 84
scrypt_core_macro3 24, 88
scrypt_core_macro3 28, 92
scrypt_core_macro3 32, 96
scrypt_core_macro3 36, 100
scrypt_core_macro3 40, 104
scrypt_core_macro3 44, 108
scrypt_core_macro3 48, 112
scrypt_core_macro3 52, 116
scrypt_core_macro3 56, 120
scrypt_core_macro3 60, 124
movl 64(%esp), %esi
movl 68(%esp), %ecx
addl $128, %esi
cmpl %ecx, %esi
jne scrypt_core_gen_loop1
movl 96(%esp), %esi
movl $1024, %ecx
scrypt_core_gen_loop2:
movl %ecx, 68(%esp)
movl 64(%edi), %edx
andl $1023, %edx
shll $7, %edx
scrypt_core_macro1b 0, 64
scrypt_core_macro1b 4, 68
scrypt_core_macro1b 8, 72
scrypt_core_macro1b 12, 76
scrypt_core_macro1b 16, 80
scrypt_core_macro1b 20, 84
scrypt_core_macro1b 24, 88
scrypt_core_macro1b 28, 92
scrypt_core_macro1b 32, 96
scrypt_core_macro1b 36, 100
scrypt_core_macro1b 40, 104
scrypt_core_macro1b 44, 108
scrypt_core_macro1b 48, 112
scrypt_core_macro1b 52, 116
scrypt_core_macro1b 56, 120
scrypt_core_macro1b 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro2 0, 64
scrypt_core_macro2 4, 68
scrypt_core_macro2 8, 72
scrypt_core_macro2 12, 76
scrypt_core_macro2 16, 80
scrypt_core_macro2 20, 84
scrypt_core_macro2 24, 88
scrypt_core_macro2 28, 92
scrypt_core_macro2 32, 96
scrypt_core_macro2 36, 100
scrypt_core_macro2 40, 104
scrypt_core_macro2 44, 108
scrypt_core_macro2 48, 112
scrypt_core_macro2 52, 116
scrypt_core_macro2 56, 120
scrypt_core_macro2 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
movl 96(%esp), %esi
scrypt_core_macro3 0, 64
scrypt_core_macro3 4, 68
scrypt_core_macro3 8, 72
scrypt_core_macro3 12, 76
scrypt_core_macro3 16, 80
scrypt_core_macro3 20, 84
scrypt_core_macro3 24, 88
scrypt_core_macro3 28, 92
scrypt_core_macro3 32, 96
scrypt_core_macro3 36, 100
scrypt_core_macro3 40, 104
scrypt_core_macro3 44, 108
scrypt_core_macro3 48, 112
scrypt_core_macro3 52, 116
scrypt_core_macro3 56, 120
scrypt_core_macro3 60, 124
movl 68(%esp), %ecx
subl $1, %ecx
ja scrypt_core_gen_loop2
addl $72, %esp
popl %esi
popl %edi
popl %ebp
popl %ebx
ret
.macro salsa8_core_sse2_doubleround
movdqa %xmm1, %xmm4
paddd %xmm0, %xmm4
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm0, %xmm4
pxor %xmm5, %xmm3
paddd %xmm3, %xmm4
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm3, %xmm4
pxor %xmm5, %xmm2
pshufd $0x93, %xmm3, %xmm3
paddd %xmm2, %xmm4
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm2, %xmm4
pxor %xmm5, %xmm1
pshufd $0x4e, %xmm2, %xmm2
paddd %xmm1, %xmm4
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
movdqa %xmm3, %xmm4
pxor %xmm5, %xmm0
pshufd $0x39, %xmm1, %xmm1
paddd %xmm0, %xmm4
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm0, %xmm4
pxor %xmm5, %xmm1
paddd %xmm1, %xmm4
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm1, %xmm4
pxor %xmm5, %xmm2
pshufd $0x93, %xmm1, %xmm1
paddd %xmm2, %xmm4
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm2, %xmm4
pxor %xmm5, %xmm3
pshufd $0x4e, %xmm2, %xmm2
paddd %xmm3, %xmm4
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
pshufd $0x39, %xmm3, %xmm3
pxor %xmm5, %xmm0
.endm
.macro salsa8_core_sse2
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
.endm
.p2align 5
scrypt_core_sse2:
movl 20(%esp), %edi
movl 24(%esp), %esi
movl %esp, %ebp
subl $128, %esp
andl $-16, %esp
scrypt_shuffle %edi, 0, %esp, 0
scrypt_shuffle %edi, 64, %esp, 64
movdqa 96(%esp), %xmm6
movdqa 112(%esp), %xmm7
movl %esi, %edx
leal 131072(%esi), %ecx
scrypt_core_sse2_loop1:
movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1
movdqa 32(%esp), %xmm2
movdqa 48(%esp), %xmm3
movdqa 64(%esp), %xmm4
movdqa 80(%esp), %xmm5
pxor %xmm4, %xmm0
pxor %xmm5, %xmm1
movdqa %xmm0, 0(%edx)
movdqa %xmm1, 16(%edx)
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm2, 32(%edx)
movdqa %xmm3, 48(%edx)
movdqa %xmm4, 64(%edx)
movdqa %xmm5, 80(%edx)
movdqa %xmm6, 96(%edx)
movdqa %xmm7, 112(%edx)
salsa8_core_sse2
paddd 0(%edx), %xmm0
paddd 16(%edx), %xmm1
paddd 32(%edx), %xmm2
paddd 48(%edx), %xmm3
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
pxor 64(%esp), %xmm0
pxor 80(%esp), %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
salsa8_core_sse2
paddd 64(%esp), %xmm0
paddd 80(%esp), %xmm1
paddd %xmm2, %xmm6
paddd %xmm3, %xmm7
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
addl $128, %edx
cmpl %ecx, %edx
jne scrypt_core_sse2_loop1
movdqa 64(%esp), %xmm4
movdqa 80(%esp), %xmm5
movl $1024, %ecx
scrypt_core_sse2_loop2:
movd %xmm4, %edx
movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1
movdqa 32(%esp), %xmm2
movdqa 48(%esp), %xmm3
andl $1023, %edx
shll $7, %edx
pxor 0(%esi, %edx), %xmm0
pxor 16(%esi, %edx), %xmm1
pxor 32(%esi, %edx), %xmm2
pxor 48(%esi, %edx), %xmm3
pxor %xmm4, %xmm0
pxor %xmm5, %xmm1
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
salsa8_core_sse2
paddd 0(%esp), %xmm0
paddd 16(%esp), %xmm1
paddd 32(%esp), %xmm2
paddd 48(%esp), %xmm3
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
pxor 64(%esi, %edx), %xmm0
pxor 80(%esi, %edx), %xmm1
pxor 96(%esi, %edx), %xmm2
pxor 112(%esi, %edx), %xmm3
pxor 64(%esp), %xmm0
pxor 80(%esp), %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
salsa8_core_sse2
paddd 64(%esp), %xmm0
paddd 80(%esp), %xmm1
paddd %xmm2, %xmm6
paddd %xmm3, %xmm7
movdqa %xmm0, %xmm4
movdqa %xmm1, %xmm5
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
subl $1, %ecx
ja scrypt_core_sse2_loop2
movdqa %xmm6, 96(%esp)
movdqa %xmm7, 112(%esp)
scrypt_shuffle %esp, 0, %edi, 0
scrypt_shuffle %esp, 64, %edi, 64
movl %ebp, %esp
popl %esi
popl %edi
popl %ebp
popl %ebx
ret
#endif

View File

@@ -1,767 +0,0 @@
/*
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include "../cpuminer-config.h"
#include "../miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
static const uint32_t keypad[12] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
};
static const uint32_t innerpad[11] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
};
static const uint32_t outerpad[8] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
};
static const uint32_t finalblk[16] = {
0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
};
static inline void HMAC_SHA256_80_init(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[8];
uint32_t pad[16];
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 16, 16);
memcpy(pad + 4, keypad, 48);
sha256_transform(tstate, pad, 0);
memcpy(ihash, tstate, 32);
sha256_init(ostate);
for (i = 0; i < 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform(ostate, pad, 0);
sha256_init(tstate);
for (i = 0; i < 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 16; i++)
pad[i] = 0x36363636;
sha256_transform(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[8], ostate2[8];
uint32_t ibuf[16], obuf[16];
int i, j;
memcpy(istate, tstate, 32);
sha256_transform(istate, salt, 0);
memcpy(ibuf, salt + 16, 16);
memcpy(ibuf + 5, innerpad, 44);
memcpy(obuf + 8, outerpad, 32);
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 32);
ibuf[4] = i + 1;
sha256_transform(obuf, ibuf, 0);
memcpy(ostate2, ostate, 32);
sha256_transform(ostate2, obuf, 0);
for (j = 0; j < 8; j++)
output[8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
const uint32_t *salt, uint32_t *output)
{
uint32_t buf[16];
int i;
sha256_transform(tstate, salt, 1);
sha256_transform(tstate, salt + 16, 1);
sha256_transform(tstate, finalblk, 0);
memcpy(buf, tstate, 32);
memcpy(buf + 8, outerpad, 32);
sha256_transform(ostate, buf, 0);
for (i = 0; i < 8; i++)
output[i] = swab32(ostate[i]);
}
#ifdef HAVE_SHA256_4WAY
static const uint32_t keypad_4way[4 * 12] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000280, 0x00000280, 0x00000280, 0x00000280
};
static const uint32_t innerpad_4way[4 * 11] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
};
static const uint32_t outerpad_4way[4 * 8] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000300, 0x00000300, 0x00000300, 0x00000300
};
static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = {
0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000620, 0x00000620, 0x00000620, 0x00000620
};
static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[4 * 8] __attribute__((aligned(16)));
uint32_t pad[4 * 16] __attribute__((aligned(16)));
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 4 * 16, 4 * 16);
memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
sha256_transform_4way(tstate, pad, 0);
memcpy(ihash, tstate, 4 * 32);
sha256_init_4way(ostate);
for (i = 0; i < 4 * 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 4 * 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform_4way(ostate, pad, 0);
sha256_init_4way(tstate);
for (i = 0; i < 4 * 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 4 * 16; i++)
pad[i] = 0x36363636;
sha256_transform_4way(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[4 * 8] __attribute__((aligned(16)));
uint32_t ostate2[4 * 8] __attribute__((aligned(16)));
uint32_t ibuf[4 * 16] __attribute__((aligned(16)));
uint32_t obuf[4 * 16] __attribute__((aligned(16)));
int i, j;
memcpy(istate, tstate, 4 * 32);
sha256_transform_4way(istate, salt, 0);
memcpy(ibuf, salt + 4 * 16, 4 * 16);
memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 4 * 32);
ibuf[4 * 4 + 0] = i + 1;
ibuf[4 * 4 + 1] = i + 1;
ibuf[4 * 4 + 2] = i + 1;
ibuf[4 * 4 + 3] = i + 1;
sha256_transform_4way(obuf, ibuf, 0);
memcpy(ostate2, ostate, 4 * 32);
sha256_transform_4way(ostate2, obuf, 0);
for (j = 0; j < 4 * 8; j++)
output[4 * 8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t buf[4 * 16] __attribute__((aligned(16)));
int i;
sha256_transform_4way(tstate, salt, 1);
sha256_transform_4way(tstate, salt + 4 * 16, 1);
sha256_transform_4way(tstate, finalblk_4way, 0);
memcpy(buf, tstate, 4 * 32);
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
sha256_transform_4way(ostate, buf, 0);
for (i = 0; i < 4 * 8; i++)
output[i] = swab32(ostate[i]);
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SHA256_8WAY
static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = {
0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
};
static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[8 * 8] __attribute__((aligned(32)));
uint32_t pad[8 * 16] __attribute__((aligned(32)));
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 8 * 16, 8 * 16);
for (i = 0; i < 8; i++)
pad[8 * 4 + i] = 0x80000000;
memset(pad + 8 * 5, 0x00, 8 * 40);
for (i = 0; i < 8; i++)
pad[8 * 15 + i] = 0x00000280;
sha256_transform_8way(tstate, pad, 0);
memcpy(ihash, tstate, 8 * 32);
sha256_init_8way(ostate);
for (i = 0; i < 8 * 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 8 * 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform_8way(ostate, pad, 0);
sha256_init_8way(tstate);
for (i = 0; i < 8 * 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 8 * 16; i++)
pad[i] = 0x36363636;
sha256_transform_8way(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[8 * 8] __attribute__((aligned(32)));
uint32_t ostate2[8 * 8] __attribute__((aligned(32)));
uint32_t ibuf[8 * 16] __attribute__((aligned(32)));
uint32_t obuf[8 * 16] __attribute__((aligned(32)));
int i, j;
memcpy(istate, tstate, 8 * 32);
sha256_transform_8way(istate, salt, 0);
memcpy(ibuf, salt + 8 * 16, 8 * 16);
for (i = 0; i < 8; i++)
ibuf[8 * 5 + i] = 0x80000000;
memset(ibuf + 8 * 6, 0x00, 8 * 36);
for (i = 0; i < 8; i++)
ibuf[8 * 15 + i] = 0x000004a0;
for (i = 0; i < 8; i++)
obuf[8 * 8 + i] = 0x80000000;
memset(obuf + 8 * 9, 0x00, 8 * 24);
for (i = 0; i < 8; i++)
obuf[8 * 15 + i] = 0x00000300;
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 8 * 32);
ibuf[8 * 4 + 0] = i + 1;
ibuf[8 * 4 + 1] = i + 1;
ibuf[8 * 4 + 2] = i + 1;
ibuf[8 * 4 + 3] = i + 1;
ibuf[8 * 4 + 4] = i + 1;
ibuf[8 * 4 + 5] = i + 1;
ibuf[8 * 4 + 6] = i + 1;
ibuf[8 * 4 + 7] = i + 1;
sha256_transform_8way(obuf, ibuf, 0);
memcpy(ostate2, ostate, 8 * 32);
sha256_transform_8way(ostate2, obuf, 0);
for (j = 0; j < 8 * 8; j++)
output[8 * 8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t buf[8 * 16] __attribute__((aligned(32)));
int i;
sha256_transform_8way(tstate, salt, 1);
sha256_transform_8way(tstate, salt + 8 * 16, 1);
sha256_transform_8way(tstate, finalblk_8way, 0);
memcpy(buf, tstate, 8 * 32);
for (i = 0; i < 8; i++)
buf[8 * 8 + i] = 0x80000000;
memset(buf + 8 * 9, 0x00, 8 * 24);
for (i = 0; i < 8; i++)
buf[8 * 15 + i] = 0x00000300;
sha256_transform_8way(ostate, buf, 0);
for (i = 0; i < 8 * 8; i++)
output[i] = swab32(ostate[i]);
}
#endif /* HAVE_SHA256_8WAY */
#if defined(__x86_64__)
#define SCRYPT_MAX_WAYS 12
#define HAVE_SCRYPT_3WAY 1
int scrypt_best_throughput();
void scrypt_core(uint32_t *X, uint32_t *V);
void scrypt_core_3way(uint32_t *X, uint32_t *V);
#if defined(USE_AVX2)
#undef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 24
#define HAVE_SCRYPT_6WAY 1
void scrypt_core_6way(uint32_t *X, uint32_t *V);
#endif
#elif defined(__i386__)
#define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V);
#elif defined(__arm__) && defined(__APCS_32__)
void scrypt_core(uint32_t *X, uint32_t *V);
#if defined(__ARM_NEON__)
#undef HAVE_SHA256_4WAY
#define SCRYPT_MAX_WAYS 3
#define HAVE_SCRYPT_3WAY 1
#define scrypt_best_throughput() 3
void scrypt_core_3way(uint32_t *X, uint32_t *V);
#endif
#else
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
{
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
int i;
x00 = (B[ 0] ^= Bx[ 0]);
x01 = (B[ 1] ^= Bx[ 1]);
x02 = (B[ 2] ^= Bx[ 2]);
x03 = (B[ 3] ^= Bx[ 3]);
x04 = (B[ 4] ^= Bx[ 4]);
x05 = (B[ 5] ^= Bx[ 5]);
x06 = (B[ 6] ^= Bx[ 6]);
x07 = (B[ 7] ^= Bx[ 7]);
x08 = (B[ 8] ^= Bx[ 8]);
x09 = (B[ 9] ^= Bx[ 9]);
x10 = (B[10] ^= Bx[10]);
x11 = (B[11] ^= Bx[11]);
x12 = (B[12] ^= Bx[12]);
x13 = (B[13] ^= Bx[13]);
x14 = (B[14] ^= Bx[14]);
x15 = (B[15] ^= Bx[15]);
for (i = 0; i < 8; i += 2) {
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
/* Operate on columns. */
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
/* Operate on rows. */
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
#undef R
}
B[ 0] += x00;
B[ 1] += x01;
B[ 2] += x02;
B[ 3] += x03;
B[ 4] += x04;
B[ 5] += x05;
B[ 6] += x06;
B[ 7] += x07;
B[ 8] += x08;
B[ 9] += x09;
B[10] += x10;
B[11] += x11;
B[12] += x12;
B[13] += x13;
B[14] += x14;
B[15] += x15;
}
static inline void scrypt_core(uint32_t *X, uint32_t *V)
{
uint32_t i, j, k;
for (i = 0; i < 1024; i++) {
memcpy(&V[i * 32], X, 128);
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
for (i = 0; i < 1024; i++) {
j = 32 * (X[16] & 1023);
for (k = 0; k < 32; k++)
X[k] ^= V[j + k];
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
}
#endif
#ifndef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 1
#define scrypt_best_throughput() 1
#endif
#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63)
unsigned char *scrypt_buffer_alloc()
{
return malloc(SCRYPT_BUFFER_SIZE);
}
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[8], ostate[8];
uint32_t X[32];
uint32_t *V;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
memcpy(tstate, midstate, 32);
HMAC_SHA256_80_init(input, tstate, ostate);
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
scrypt_core(X, V);
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
uint32_t ostate[4 * 8] __attribute__((aligned(128)));
uint32_t W[4 * 32] __attribute__((aligned(128)));
uint32_t X[4 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (i = 0; i < 20; i++)
for (k = 0; k < 4; k++)
W[4 * i + k] = input[k * 20 + i];
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
tstate[4 * i + k] = midstate[i];
HMAC_SHA256_80_init_4way(W, tstate, ostate);
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
X[k * 32 + i] = W[4 * i + k];
scrypt_core(X + 0 * 32, V);
scrypt_core(X + 1 * 32, V);
scrypt_core(X + 2 * 32, V);
scrypt_core(X + 3 * 32, V);
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
W[4 * i + k] = X[k * 32 + i];
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
output[k * 8 + i] = W[4 * i + k];
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SCRYPT_3WAY
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[3 * 8], ostate[3 * 8];
uint32_t X[3 * 32] __attribute__((aligned(64)));
uint32_t *V;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
memcpy(tstate + 0, midstate, 32);
memcpy(tstate + 8, midstate, 32);
memcpy(tstate + 16, midstate, 32);
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
scrypt_core_3way(X, V);
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[12 * 8] __attribute__((aligned(128)));
uint32_t ostate[12 * 8] __attribute__((aligned(128)));
uint32_t W[12 * 32] __attribute__((aligned(128)));
uint32_t X[12 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, j, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (j = 0; j < 3; j++)
for (i = 0; i < 20; i++)
for (k = 0; k < 4; k++)
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
tstate[32 * j + 4 * i + k] = midstate[i];
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
scrypt_core_3way(X + 0 * 96, V);
scrypt_core_3way(X + 1 * 96, V);
scrypt_core_3way(X + 2 * 96, V);
scrypt_core_3way(X + 3 * 96, V);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
}
#endif /* HAVE_SHA256_4WAY */
#endif /* HAVE_SCRYPT_3WAY */
#ifdef HAVE_SCRYPT_6WAY
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[24 * 8] __attribute__((aligned(128)));
uint32_t ostate[24 * 8] __attribute__((aligned(128)));
uint32_t W[24 * 32] __attribute__((aligned(128)));
uint32_t X[24 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, j, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (j = 0; j < 3; j++)
for (i = 0; i < 20; i++)
for (k = 0; k < 8; k++)
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 8; k++)
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++)
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
scrypt_core_6way(X + 0 * 32, V);
scrypt_core_6way(X + 6 * 32, V);
scrypt_core_6way(X + 12 * 32, V);
scrypt_core_6way(X + 18 * 32, V);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++)
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 8; k++)
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
}
#endif /* HAVE_SCRYPT_6WAY */
int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
uint32_t midstate[8];
uint32_t n = pdata[19] - 1;
const uint32_t Htarg = ptarget[7];
int throughput = scrypt_best_throughput();
int i;
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
throughput *= 4;
#endif
for (i = 0; i < throughput; i++)
memcpy(data + i * 20, pdata, 80);
sha256_init(midstate);
sha256_transform(midstate, data, 0);
do {
for (i = 0; i < throughput; i++)
data[i * 20 + 19] = ++n;
#if defined(HAVE_SHA256_4WAY)
if (throughput == 4)
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf);
else
#endif
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
if (throughput == 12)
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf);
else
#endif
#if defined(HAVE_SCRYPT_6WAY)
if (throughput == 24)
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf);
else
#endif
#if defined(HAVE_SCRYPT_3WAY)
if (throughput == 3)
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf);
else
#endif
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf);
for (i = 0; i < throughput; i++) {
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
*hashes_done = n - pdata[19] + 1;
pdata[19] = data[i * 20 + 19];
return 1;
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - pdata[19] + 1;
pdata[19] = n;
return 0;
}
bool register_scrypt_algo( algo_gate_t* gate )
{
gate->scanhash = &scanhash_scrypt;
gate->hash = &scrypt_hash;
// gate->get_max64 = scrypt_get_max64;
return true;
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,630 +0,0 @@
/*
* Copyright 2011 ArtForz
* Copyright 2011-2013 pooler
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "../cpuminer-config.h"
#include "../miner.h"
#include <string.h>
#include <stdint.h>
#if defined(__arm__) && defined(__APCS_32__)
#define EXTERN_SHA256
#endif
static const uint32_t sha256_h[8] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
static const uint32_t sha256_k[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
void sha256_init(uint32_t *state)
{
memcpy(state, sha256_h, 32);
}
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
do { \
t0 = h + S1(e) + Ch(e, f, g) + k; \
t1 = S0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1; \
} while (0)
/* Adjusted round function for rotating state */
#define RNDr(S, W, i) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
W[i] + sha256_k[i])
#ifndef EXTERN_SHA256
/*
* SHA256 block compression function. The 256-bit state is transformed via
* the 512-bit input block to produce a new state.
*/
void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
{
uint32_t W[64];
uint32_t S[8];
uint32_t t0, t1;
int i;
/* 1. Prepare message schedule W. */
if (swap) {
for (i = 0; i < 16; i++)
W[i] = swab32(block[i]);
} else
memcpy(W, block, 64);
for (i = 16; i < 64; i += 2) {
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
}
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
RNDr(S, W, 0);
RNDr(S, W, 1);
RNDr(S, W, 2);
RNDr(S, W, 3);
RNDr(S, W, 4);
RNDr(S, W, 5);
RNDr(S, W, 6);
RNDr(S, W, 7);
RNDr(S, W, 8);
RNDr(S, W, 9);
RNDr(S, W, 10);
RNDr(S, W, 11);
RNDr(S, W, 12);
RNDr(S, W, 13);
RNDr(S, W, 14);
RNDr(S, W, 15);
RNDr(S, W, 16);
RNDr(S, W, 17);
RNDr(S, W, 18);
RNDr(S, W, 19);
RNDr(S, W, 20);
RNDr(S, W, 21);
RNDr(S, W, 22);
RNDr(S, W, 23);
RNDr(S, W, 24);
RNDr(S, W, 25);
RNDr(S, W, 26);
RNDr(S, W, 27);
RNDr(S, W, 28);
RNDr(S, W, 29);
RNDr(S, W, 30);
RNDr(S, W, 31);
RNDr(S, W, 32);
RNDr(S, W, 33);
RNDr(S, W, 34);
RNDr(S, W, 35);
RNDr(S, W, 36);
RNDr(S, W, 37);
RNDr(S, W, 38);
RNDr(S, W, 39);
RNDr(S, W, 40);
RNDr(S, W, 41);
RNDr(S, W, 42);
RNDr(S, W, 43);
RNDr(S, W, 44);
RNDr(S, W, 45);
RNDr(S, W, 46);
RNDr(S, W, 47);
RNDr(S, W, 48);
RNDr(S, W, 49);
RNDr(S, W, 50);
RNDr(S, W, 51);
RNDr(S, W, 52);
RNDr(S, W, 53);
RNDr(S, W, 54);
RNDr(S, W, 55);
RNDr(S, W, 56);
RNDr(S, W, 57);
RNDr(S, W, 58);
RNDr(S, W, 59);
RNDr(S, W, 60);
RNDr(S, W, 61);
RNDr(S, W, 62);
RNDr(S, W, 63);
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
state[i] += S[i];
}
#endif /* EXTERN_SHA256 */
static const uint32_t sha256d_hash1[16] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x80000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000100
};
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
{
uint32_t S[16];
int i;
sha256_init(S);
sha256_transform(S, data, 0);
sha256_transform(S, data + 16, 0);
memcpy(S + 8, sha256d_hash1 + 8, 32);
sha256_init(hash);
sha256_transform(hash, S, 0);
for (i = 0; i < 8; i++)
hash[i] = swab32(hash[i]);
}
void sha256d(unsigned char *hash, const unsigned char *data, int len)
{
uint32_t S[16], T[16];
int i, r;
sha256_init(S);
for (r = len; r > -9; r -= 64) {
if (r < 64)
memset(T, 0, 64);
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
if (r >= 0 && r < 64)
((unsigned char *)T)[r] = 0x80;
for (i = 0; i < 16; i++)
T[i] = be32dec(T + i);
if (r < 56)
T[15] = 8 * len;
sha256_transform(S, T, 0);
}
memcpy(S + 8, sha256d_hash1 + 8, 32);
sha256_init(T);
sha256_transform(T, S, 0);
for (i = 0; i < 8; i++)
be32enc((uint32_t *)hash + i, T[i]);
}
static inline void sha256d_preextend(uint32_t *W)
{
W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0];
W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1];
W[18] = s1(W[16]) + W[11] + W[ 2];
W[19] = s1(W[17]) + W[12] + s0(W[ 4]);
W[20] = W[13] + s0(W[ 5]) + W[ 4];
W[21] = W[14] + s0(W[ 6]) + W[ 5];
W[22] = W[15] + s0(W[ 7]) + W[ 6];
W[23] = W[16] + s0(W[ 8]) + W[ 7];
W[24] = W[17] + s0(W[ 9]) + W[ 8];
W[25] = s0(W[10]) + W[ 9];
W[26] = s0(W[11]) + W[10];
W[27] = s0(W[12]) + W[11];
W[28] = s0(W[13]) + W[12];
W[29] = s0(W[14]) + W[13];
W[30] = s0(W[15]) + W[14];
W[31] = s0(W[16]) + W[15];
}
static inline void sha256d_prehash(uint32_t *S, const uint32_t *W)
{
uint32_t t0, t1;
RNDr(S, W, 0);
RNDr(S, W, 1);
RNDr(S, W, 2);
}
#ifdef EXTERN_SHA256
void sha256d_ms(uint32_t *hash, uint32_t *W,
const uint32_t *midstate, const uint32_t *prehash);
#else
static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
const uint32_t *midstate, const uint32_t *prehash)
{
uint32_t S[64];
uint32_t t0, t1;
int i;
S[18] = W[18];
S[19] = W[19];
S[20] = W[20];
S[22] = W[22];
S[23] = W[23];
S[24] = W[24];
S[30] = W[30];
S[31] = W[31];
W[18] += s0(W[3]);
W[19] += W[3];
W[20] += s1(W[18]);
W[21] = s1(W[19]);
W[22] += s1(W[20]);
W[23] += s1(W[21]);
W[24] += s1(W[22]);
W[25] = s1(W[23]) + W[18];
W[26] = s1(W[24]) + W[19];
W[27] = s1(W[25]) + W[20];
W[28] = s1(W[26]) + W[21];
W[29] = s1(W[27]) + W[22];
W[30] += s1(W[28]) + W[23];
W[31] += s1(W[29]) + W[24];
for (i = 32; i < 64; i += 2) {
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
}
memcpy(S, prehash, 32);
RNDr(S, W, 3);
RNDr(S, W, 4);
RNDr(S, W, 5);
RNDr(S, W, 6);
RNDr(S, W, 7);
RNDr(S, W, 8);
RNDr(S, W, 9);
RNDr(S, W, 10);
RNDr(S, W, 11);
RNDr(S, W, 12);
RNDr(S, W, 13);
RNDr(S, W, 14);
RNDr(S, W, 15);
RNDr(S, W, 16);
RNDr(S, W, 17);
RNDr(S, W, 18);
RNDr(S, W, 19);
RNDr(S, W, 20);
RNDr(S, W, 21);
RNDr(S, W, 22);
RNDr(S, W, 23);
RNDr(S, W, 24);
RNDr(S, W, 25);
RNDr(S, W, 26);
RNDr(S, W, 27);
RNDr(S, W, 28);
RNDr(S, W, 29);
RNDr(S, W, 30);
RNDr(S, W, 31);
RNDr(S, W, 32);
RNDr(S, W, 33);
RNDr(S, W, 34);
RNDr(S, W, 35);
RNDr(S, W, 36);
RNDr(S, W, 37);
RNDr(S, W, 38);
RNDr(S, W, 39);
RNDr(S, W, 40);
RNDr(S, W, 41);
RNDr(S, W, 42);
RNDr(S, W, 43);
RNDr(S, W, 44);
RNDr(S, W, 45);
RNDr(S, W, 46);
RNDr(S, W, 47);
RNDr(S, W, 48);
RNDr(S, W, 49);
RNDr(S, W, 50);
RNDr(S, W, 51);
RNDr(S, W, 52);
RNDr(S, W, 53);
RNDr(S, W, 54);
RNDr(S, W, 55);
RNDr(S, W, 56);
RNDr(S, W, 57);
RNDr(S, W, 58);
RNDr(S, W, 59);
RNDr(S, W, 60);
RNDr(S, W, 61);
RNDr(S, W, 62);
RNDr(S, W, 63);
for (i = 0; i < 8; i++)
S[i] += midstate[i];
W[18] = S[18];
W[19] = S[19];
W[20] = S[20];
W[22] = S[22];
W[23] = S[23];
W[24] = S[24];
W[30] = S[30];
W[31] = S[31];
memcpy(S + 8, sha256d_hash1 + 8, 32);
S[16] = s1(sha256d_hash1[14]) + sha256d_hash1[ 9] + s0(S[ 1]) + S[ 0];
S[17] = s1(sha256d_hash1[15]) + sha256d_hash1[10] + s0(S[ 2]) + S[ 1];
S[18] = s1(S[16]) + sha256d_hash1[11] + s0(S[ 3]) + S[ 2];
S[19] = s1(S[17]) + sha256d_hash1[12] + s0(S[ 4]) + S[ 3];
S[20] = s1(S[18]) + sha256d_hash1[13] + s0(S[ 5]) + S[ 4];
S[21] = s1(S[19]) + sha256d_hash1[14] + s0(S[ 6]) + S[ 5];
S[22] = s1(S[20]) + sha256d_hash1[15] + s0(S[ 7]) + S[ 6];
S[23] = s1(S[21]) + S[16] + s0(sha256d_hash1[ 8]) + S[ 7];
S[24] = s1(S[22]) + S[17] + s0(sha256d_hash1[ 9]) + sha256d_hash1[ 8];
S[25] = s1(S[23]) + S[18] + s0(sha256d_hash1[10]) + sha256d_hash1[ 9];
S[26] = s1(S[24]) + S[19] + s0(sha256d_hash1[11]) + sha256d_hash1[10];
S[27] = s1(S[25]) + S[20] + s0(sha256d_hash1[12]) + sha256d_hash1[11];
S[28] = s1(S[26]) + S[21] + s0(sha256d_hash1[13]) + sha256d_hash1[12];
S[29] = s1(S[27]) + S[22] + s0(sha256d_hash1[14]) + sha256d_hash1[13];
S[30] = s1(S[28]) + S[23] + s0(sha256d_hash1[15]) + sha256d_hash1[14];
S[31] = s1(S[29]) + S[24] + s0(S[16]) + sha256d_hash1[15];
for (i = 32; i < 60; i += 2) {
S[i] = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16];
S[i+1] = s1(S[i - 1]) + S[i - 6] + s0(S[i - 14]) + S[i - 15];
}
S[60] = s1(S[58]) + S[53] + s0(S[45]) + S[44];
sha256_init(hash);
RNDr(hash, S, 0);
RNDr(hash, S, 1);
RNDr(hash, S, 2);
RNDr(hash, S, 3);
RNDr(hash, S, 4);
RNDr(hash, S, 5);
RNDr(hash, S, 6);
RNDr(hash, S, 7);
RNDr(hash, S, 8);
RNDr(hash, S, 9);
RNDr(hash, S, 10);
RNDr(hash, S, 11);
RNDr(hash, S, 12);
RNDr(hash, S, 13);
RNDr(hash, S, 14);
RNDr(hash, S, 15);
RNDr(hash, S, 16);
RNDr(hash, S, 17);
RNDr(hash, S, 18);
RNDr(hash, S, 19);
RNDr(hash, S, 20);
RNDr(hash, S, 21);
RNDr(hash, S, 22);
RNDr(hash, S, 23);
RNDr(hash, S, 24);
RNDr(hash, S, 25);
RNDr(hash, S, 26);
RNDr(hash, S, 27);
RNDr(hash, S, 28);
RNDr(hash, S, 29);
RNDr(hash, S, 30);
RNDr(hash, S, 31);
RNDr(hash, S, 32);
RNDr(hash, S, 33);
RNDr(hash, S, 34);
RNDr(hash, S, 35);
RNDr(hash, S, 36);
RNDr(hash, S, 37);
RNDr(hash, S, 38);
RNDr(hash, S, 39);
RNDr(hash, S, 40);
RNDr(hash, S, 41);
RNDr(hash, S, 42);
RNDr(hash, S, 43);
RNDr(hash, S, 44);
RNDr(hash, S, 45);
RNDr(hash, S, 46);
RNDr(hash, S, 47);
RNDr(hash, S, 48);
RNDr(hash, S, 49);
RNDr(hash, S, 50);
RNDr(hash, S, 51);
RNDr(hash, S, 52);
RNDr(hash, S, 53);
RNDr(hash, S, 54);
RNDr(hash, S, 55);
RNDr(hash, S, 56);
hash[2] += hash[6] + S1(hash[3]) + Ch(hash[3], hash[4], hash[5])
+ S[57] + sha256_k[57];
hash[1] += hash[5] + S1(hash[2]) + Ch(hash[2], hash[3], hash[4])
+ S[58] + sha256_k[58];
hash[0] += hash[4] + S1(hash[1]) + Ch(hash[1], hash[2], hash[3])
+ S[59] + sha256_k[59];
hash[7] += hash[3] + S1(hash[0]) + Ch(hash[0], hash[1], hash[2])
+ S[60] + sha256_k[60]
+ sha256_h[7];
}
#endif /* EXTERN_SHA256 */
#ifdef HAVE_SHA256_4WAY
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
const uint32_t *midstate, const uint32_t *prehash);
static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[4 * 64] __attribute__((aligned(128)));
uint32_t hash[4 * 8] __attribute__((aligned(32)));
uint32_t midstate[4 * 8] __attribute__((aligned(32)));
uint32_t prehash[4 * 8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int i, j;
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
for (i = 31; i >= 0; i--)
for (j = 0; j < 4; j++)
data[i * 4 + j] = data[i];
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
for (i = 7; i >= 0; i--) {
for (j = 0; j < 4; j++) {
midstate[i * 4 + j] = midstate[i];
prehash[i * 4 + j] = prehash[i];
}
}
do {
for (i = 0; i < 4; i++)
data[4 * 3 + i] = ++n;
sha256d_ms_4way(hash, data, midstate, prehash);
for (i = 0; i < 4; i++) {
if (swab32(hash[4 * 7 + i]) <= Htarg) {
pdata[19] = data[4 * 3 + i];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SHA256_8WAY
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
const uint32_t *midstate, const uint32_t *prehash);
static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[8 * 64] __attribute__((aligned(128)));
uint32_t hash[8 * 8] __attribute__((aligned(32)));
uint32_t midstate[8 * 8] __attribute__((aligned(32)));
uint32_t prehash[8 * 8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int i, j;
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
for (i = 31; i >= 0; i--)
for (j = 0; j < 8; j++)
data[i * 8 + j] = data[i];
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
for (i = 7; i >= 0; i--) {
for (j = 0; j < 8; j++) {
midstate[i * 8 + j] = midstate[i];
prehash[i * 8 + j] = prehash[i];
}
}
do {
for (i = 0; i < 8; i++)
data[8 * 3 + i] = ++n;
sha256d_ms_8way(hash, data, midstate, prehash);
for (i = 0; i < 8; i++) {
if (swab32(hash[8 * 7 + i]) <= Htarg) {
pdata[19] = data[8 * 3 + i];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif /* HAVE_SHA256_8WAY */
int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[64] __attribute__((aligned(128)));
uint32_t hash[8] __attribute__((aligned(32)));
uint32_t midstate[8] __attribute__((aligned(32)));
uint32_t prehash[8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
#ifdef HAVE_SHA256_8WAY
if (sha256_use_8way())
return scanhash_sha256d_8way(thr_id, pdata, ptarget,
max_nonce, hashes_done);
#endif
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
return scanhash_sha256d_4way(thr_id, pdata, ptarget,
max_nonce, hashes_done);
#endif
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
do {
data[3] = ++n;
sha256d_ms(hash, data, midstate, prehash);
if (swab32(hash[7]) <= Htarg) {
pdata[19] = data[3];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}

View File

@@ -32,12 +32,10 @@
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include "algo/groestl/sph_groestl.h"
#include "algo/keccak/sph_keccak.h" #include "algo/keccak/sph_keccak.h"
#ifdef NO_AES_NI #ifndef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#include "algo/groestl/aes_ni/hash-groestl.h" #include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h" #include "algo/echo/aes_ni/hash_api.h"
#endif #endif
@@ -61,17 +59,21 @@
#define POK_DATA_MASK 0xFFFF0000 #define POK_DATA_MASK 0xFFFF0000
typedef struct { typedef struct {
#ifndef NO_AES_NI #ifdef NO_AES_NI
hashState_groestl groestl; sph_groestl512_context groestl;
#else
hashState_groestl groestl;
#endif #endif
sph_keccak512_context keccak; sph_keccak512_context keccak;
} zr5_ctx_holder; } zr5_ctx_holder;
zr5_ctx_holder zr5_ctx; zr5_ctx_holder zr5_ctx;
void init_zr5_ctx() void init_zr5_ctx()
{ {
#ifndef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512_init( &zr5_ctx.groestl );
#else
init_groestl( &zr5_ctx.groestl ); init_groestl( &zr5_ctx.groestl );
#endif #endif
sph_keccak512_init(&zr5_ctx.keccak); sph_keccak512_init(&zr5_ctx.keccak);
@@ -88,10 +90,6 @@ DATA_ALIGN16(sph_u64 hashctB);
//memset(hash, 0, 128); //memset(hash, 0, 128);
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
static const int arrOrder[][4] = static const int arrOrder[][4] =
{ {
{ 0, 1, 2, 3 }, { 0, 1, 3, 2 }, { 0, 2, 1, 3 }, { 0, 2, 3, 1 }, { 0, 1, 2, 3 }, { 0, 1, 3, 2 }, { 0, 2, 1, 3 }, { 0, 2, 3, 1 },
@@ -123,9 +121,8 @@ static const int arrOrder[][4] =
break; break;
case 1: case 1:
#ifdef NO_AES_NI #ifdef NO_AES_NI
{GRS_I; sph_groestl512 (&ctx.groestl, hash, 64);
GRS_U; sph_groestl512_close(&ctx.groestl, hash);
GRS_C; }
#else #else
update_groestl( &ctx.groestl, (char*)hash,512); update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash); final_groestl( &ctx.groestl, (char*)hash);

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.4.12]) AC_INIT([cpuminer-opt], [3.5.0])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -2031,7 +2031,7 @@ bool jr2_stratum_handle_response( json_t *val )
static bool stratum_handle_response( char *buf ) static bool stratum_handle_response( char *buf )
{ {
json_t *val, *res_val, *id_val; json_t *val, *id_val;
json_error_t err; json_error_t err;
bool ret = false; bool ret = false;
@@ -2041,7 +2041,7 @@ static bool stratum_handle_response( char *buf )
applog(LOG_INFO, "JSON decode failed(%d): %s", err.line, err.text); applog(LOG_INFO, "JSON decode failed(%d): %s", err.line, err.text);
goto out; goto out;
} }
res_val = json_object_get( val, "result" ); json_object_get( val, "result" );
id_val = json_object_get( val, "id" ); id_val = json_object_get( val, "id" );
if ( !id_val || json_is_null(id_val) ) if ( !id_val || json_is_null(id_val) )
goto out; goto out;
@@ -2477,9 +2477,9 @@ void parse_arg(int key, char *arg )
show_usage_and_exit(1); show_usage_and_exit(1);
} }
free(rpc_url); free(rpc_url);
rpc_url = (char*) malloc(strlen(hp) + 8); rpc_url = (char*) malloc( strlen(hp) + 15 );
sprintf(rpc_url, "http://%s", hp); sprintf( rpc_url, "stratum+tcp://%s", hp );
short_url = &rpc_url[sizeof("http://")-1]; short_url = &rpc_url[ sizeof("stratum+tcp://") - 1 ];
} }
have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7); have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7);
break; break;

View File

@@ -331,6 +331,7 @@ bool has_sse();
void cpu_bestcpu_feature( char *outbuf, size_t maxsz ); void cpu_bestcpu_feature( char *outbuf, size_t maxsz );
void cpu_getname(char *outbuf, size_t maxsz); void cpu_getname(char *outbuf, size_t maxsz);
void cpu_getmodelid(char *outbuf, size_t maxsz); void cpu_getmodelid(char *outbuf, size_t maxsz);
void cpu_brand_string( char* s );
float cpu_temp( int core ); float cpu_temp( int core );