Compare commits

..

35 Commits

Author SHA1 Message Date
Jay D Dee
d60a268972 v3.8.2 2018-02-15 14:48:50 -05:00
Jay D Dee
e4265a6f11 v3.8.1.1 2018-02-09 23:30:14 -05:00
Jay D Dee
a28daca3ce v3.8.1 2018-02-07 16:38:45 -05:00
Jay D Dee
54b8fd7362 v3.8.0.1 2018-02-05 22:10:18 -05:00
Jay D Dee
ad2275f74a v3.8.0 2018-01-23 21:02:16 -05:00
Jay D Dee
a90d75b8f5 v3.7.10 2018-01-16 15:11:44 -05:00
Jay D Dee
bee78eac76 v3.7.9 2018-01-08 22:04:43 -05:00
Jay D Dee
2d2e54f001 v3.7.8 2017-12-30 19:19:46 -05:00
Jay D Dee
79164c24b5 v3.7.7 2017-12-17 12:00:42 -05:00
Jay D Dee
7a1389998b v3.7.6 2017-12-14 18:28:51 -05:00
Jay D Dee
af1c940919 v3.7.5 2017-12-08 15:39:28 -05:00
Jay D Dee
4b57ac0eb9 v3.7.4 2017-11-28 16:32:04 -05:00
Jay D Dee
6d1361c87f v3.7.3 2017-11-20 21:19:15 -05:00
Jay D Dee
ab39e88318 v3.7.2 2017-11-01 11:03:23 -04:00
Jay D Dee
8ff52e7ad6 v3.7.1 2017-10-31 00:25:24 -04:00
Jay D Dee
aaa48599ad v3.7.0 2017-10-17 11:38:59 -04:00
Jay D Dee
c76574b2cd v3.6.11 2017-10-12 15:14:37 -04:00
Jay D Dee
989fb42d20 v3.6.10 2017-10-12 11:49:40 -04:00
Jay D Dee
710c852f05 v3.6.9 2017-10-09 21:45:27 -04:00
Jay D Dee
39f089d3dc v3.6.8 2017-07-31 20:02:45 -04:00
Jay D Dee
ec4f6028a2 v3.6.7 2017-07-24 21:38:32 -04:00
Jay D Dee
f8907677f6 v3.6.6 2017-07-01 14:37:11 -04:00
Jay D Dee
7544cb956c v3.6.5 2017-05-19 16:38:26 -04:00
Jay D Dee
e7dbd27636 v3.6.4 2017-05-02 10:28:19 -04:00
Jay D Dee
0155cd1bfe v3.6.3 2017-04-17 14:35:18 -04:00
Jay D Dee
53259692eb v3.6.2 2017-04-16 12:55:19 -04:00
Jay D Dee
9afa0d9820 v3.6.1 2017-03-31 11:59:04 -04:00
Jay D Dee
2dcc4821b6 v3.6.0 2017-03-15 13:21:18 -04:00
Jay D Dee
200b06f369 I have no idea why I need to do this except git told me to. I hope
it doesn't break anything, I just want to update the master branch.
2017-03-10 11:52:49 -05:00
Jay D Dee
f1f9e821a2 v3.5.13 2017-03-10 11:38:58 -05:00
JayDDee
6431c25293 Merge pull request #6 from felixbrucker/felixbrucker/update-dockerfile
Update Dockerfile for cpuminer-opt
2017-03-04 11:29:25 -05:00
Felix Brucker
e1742be8c4 Update Dockerfile for cpuminer-opt 2017-03-04 11:31:19 +01:00
Jay D Dee
38c6f23b66 v3.5.12 2017-03-03 13:55:00 -05:00
Jay D Dee
1b288b1209 v3.5.11 2017-02-28 17:15:39 -05:00
Jay D Dee
f7865ae9f9 v3.5.10 2017-02-26 13:37:00 -05:00
365 changed files with 42743 additions and 17394 deletions

1
.gitignore vendored
View File

@@ -11,7 +11,6 @@ autom4te.cache
Makefile
Makefile.in
INSTALL
configure
configure.lineno
depcomp
missing

12
AUTHORS
View File

@@ -16,4 +16,16 @@ LucasJones
tpruvot@github
elmad
djm34
palmd
ig0tik3d
Wolf0
Optiminer
Jay D Dee

View File

@@ -5,19 +5,31 @@
# ex: docker run -it --rm cpuminer-opt:latest -a cryptonight -o cryptonight.eu.nicehash.com:3355 -u 1MiningDW2GKzf4VQfmp4q2XoUvR6iy6PD.worker1 -p x -t 3
#
FROM ubuntu:16.04
RUN BUILD_DEPS="build-essential \
libssl-dev \
libgmp-dev \
libcurl4-openssl-dev \
libjansson-dev \
automake" && \
# Build
FROM ubuntu:16.04 as builder
apt-get update && \
apt-get install -y ${BUILD_DEPS}
RUN apt-get update \
&& apt-get install -y \
build-essential \
libssl-dev \
libgmp-dev \
libcurl4-openssl-dev \
libjansson-dev \
automake \
&& rm -rf /var/lib/apt/lists/*
COPY . /app/
RUN cd /app/ && ./build.sh
RUN cd /app/ && ./build.sh
ENTRYPOINT ["/app/cpuminer"]
# App
FROM ubuntu:16.04
RUN apt-get update \
&& apt-get install -y \
libcurl3 \
libjansson4 \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/cpuminer .
ENTRYPOINT ["./cpuminer"]
CMD ["-h"]

View File

@@ -22,30 +22,6 @@ cpuminer_SOURCES = \
api.c \
sysinfos.c \
algo-gate-api.c\
algo/groestl/sph_groestl.c \
algo/skein/sph_skein.c \
algo/bmw/sph_bmw.c \
algo/shavite/sph_shavite.c \
algo/shavite/shavite.c \
algo/echo/sph_echo.c \
algo/blake/sph_blake.c \
algo/blake/sph_blake2b.c \
algo/heavy/sph_hefty1.c \
algo/blake/mod_blakecoin.c \
algo/luffa/sph_luffa.c \
algo/cubehash/sph_cubehash.c \
algo/simd/sph_simd.c \
algo/hamsi/sph_hamsi.c \
algo/fugue/sph_fugue.c \
algo/gost/sph_gost.c \
algo/jh/sph_jh.c \
algo/keccak/sph_keccak.c \
algo/keccak/keccak.c\
algo/sha3/sph_sha2.c \
algo/sha3/sph_sha2big.c \
algo/shabal/sph_shabal.c \
algo/whirlpool/sph_whirlpool.c\
crypto/blake2s.c \
crypto/oaes_lib.c \
crypto/c_keccak.c \
crypto/c_groestl.c \
@@ -61,92 +37,209 @@ cpuminer_SOURCES = \
algo/argon2/ar2/cores.c \
algo/argon2/ar2/ar2-scrypt-jane.c \
algo/argon2/ar2/blake2b.c \
algo/axiom.c \
algo/blake/sph_blake.c \
algo/blake/blake-hash-4way.c \
algo/blake/blake-gate.c \
algo/blake/blake.c \
algo/blake/blake-4way.c \
algo/blake/sph_blake2b.c \
algo/blake/blake2b.c \
algo/blake/sph-blake2s.c \
algo/blake/blake2s.c \
algo/blake/blakecoin-gate.c \
algo/blake/mod_blakecoin.c \
algo/blake/blakecoin.c \
algo/blake/blakecoin-4way.c \
algo/blake/decred-gate.c \
algo/blake/decred.c \
algo/blake/decred-4way.c \
algo/blake/pentablake-gate.c \
algo/blake/pentablake-4way.c \
algo/blake/pentablake.c \
algo/bmw/sph_bmw.c \
algo/bmw/bmw-hash-4way.c \
algo/bmw/bmw256.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/cryptonight/cryptolight.c \
algo/cryptonight/cryptonight-common.c\
algo/cryptonight/cryptonight-aesni.c\
algo/cryptonight/cryptonight.c\
algo/drop.c \
algo/cubehash/sph_cubehash.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/echo/sph_echo.c \
algo/echo/aes_ni/hash.c\
algo/fresh.c \
algo/gost/sph_gost.c \
algo/groestl/sph_groestl.c \
algo/groestl/groestl.c \
algo/groestl/myrgr-gate.c \
algo/groestl/myrgr-4way.c \
algo/groestl/myr-groestl.c \
algo/groestl/aes_ni/hash-groestl.c \
algo/groestl/aes_ni/hash-groestl256.c \
algo/groestl/sse2/grso.c \
algo/groestl/sse2/grso-asm.c \
algo/fugue/sph_fugue.c \
algo/hamsi/sph_hamsi.c \
algo/hamsi/hamsi-hash-4way.c \
algo/haval/haval.c \
algo/haval/haval-hash-4way.c \
algo/heavy/sph_hefty1.c \
algo/heavy/heavy.c \
algo/heavy/bastion.c \
algo/hmq1725.c \
algo/hodl/hodl.cpp \
algo/hodl/hodl-gate.c \
algo/hodl/hodl_arith_uint256.cpp \
algo/hodl/hodl_uint256.cpp \
algo/hodl/hash.cpp \
algo/hodl/hmac_sha512.cpp \
algo/hodl/sha256.cpp \
algo/hodl/sha512.cpp \
algo/hodl/utilstrencodings.cpp \
algo/hodl/hodl-wolf.c \
algo/hodl/aes.c \
algo/hodl/hodl-gate.c \
algo/hodl/hodl-wolf.c \
algo/hodl/sha512_avx.c \
algo/hodl/sha512_avx2.c \
algo/lbry.c \
algo/jh/sph_jh.c \
algo/jh/jh-hash-4way.c \
algo/jh/jha-gate.c \
algo/jh/jha-4way.c \
algo/jh/jha.c \
algo/keccak/sph_keccak.c \
algo/keccak/keccak.c\
algo/keccak/keccak-hash-4way.c \
algo/keccak/keccak-4way.c\
algo/keccak/keccak-gate.c \
algo/keccak/sse2/keccak.c \
algo/luffa/sph_luffa.c \
algo/luffa/luffa.c \
algo/luffa/sse2/luffa_for_sse2.c \
algo/luffa/luffa_for_sse2.c \
algo/luffa/luffa-hash-2way.c \
algo/lyra2/lyra2.c \
algo/lyra2/sponge.c \
algo/lyra2/lyra2rev2-gate.c \
algo/lyra2/lyra2rev2.c \
algo/lyra2/lyra2rev2-4way.c \
algo/lyra2/lyra2re.c \
algo/lyra2/zcoin.c \
algo/lyra2/zoin.c \
algo/keccak/sse2/keccak.c \
algo/lyra2/lyra2z-gate.c \
algo/lyra2/lyra2z.c \
algo/lyra2/lyra2z-4way.c \
algo/lyra2/lyra2z330.c \
algo/lyra2/lyra2h-gate.c \
algo/lyra2/lyra2h.c \
algo/lyra2/lyra2h-4way.c \
algo/lyra2/allium-gate.c \
algo/lyra2/allium-4way.c \
algo/lyra2/allium.c \
algo/m7m.c \
algo/neoscrypt.c \
algo/nist5.c \
algo/neoscrypt/neoscrypt.c \
algo/nist5/nist5-gate.c \
algo/nist5/nist5-4way.c \
algo/nist5/nist5.c \
algo/nist5/zr5.c \
algo/pluck.c \
algo/quark/quark-gate.c \
algo/quark/quark.c \
algo/quark/quark-4way.c \
algo/quark/anime-gate.c \
algo/quark/anime.c \
algo/quark/anime-4way.c \
algo/qubit/qubit-gate.c \
algo/qubit/qubit.c \
algo/qubit/qubit-2way.c \
algo/qubit/deep-gate.c \
algo/qubit/deep-2way.c \
algo/qubit/deep.c \
algo/ripemd/sph_ripemd.c \
algo/ripemd/ripemd-hash-4way.c \
algo/ripemd/lbry-gate.c \
algo/ripemd/lbry.c \
algo/ripemd/lbry-4way.c \
algo/scrypt.c \
algo/scryptjane/scrypt-jane.c \
algo/sha2/sha2.c \
algo/sha2/sha256t.c \
algo/simd/sse2/nist.c \
algo/simd/sse2/vector.c \
algo/sha/sph_sha2.c \
algo/sha/sph_sha2big.c \
algo/sha/sha2-hash-4way.c \
algo/sha/sha2.c \
algo/sha/sha256t.c \
algo/shabal/sph_shabal.c \
algo/shabal/shabal-hash-4way.c \
algo/shavite/sph_shavite.c \
algo/shavite/sph-shavite-aesni.c \
algo/shavite/shavite.c \
algo/simd/sph_simd.c \
algo/simd/nist.c \
algo/simd/vector.c \
algo/simd/simd-hash-2way.c \
algo/skein/sph_skein.c \
algo/skein/skein-hash-4way.c \
algo/skein/skein.c \
algo/skein/skein-4way.c \
algo/skein/skein-gate.c \
algo/skein/skein2.c \
algo/s3.c \
algo/skein/skein2-4way.c \
algo/skein/skein2-gate.c \
algo/sm3/sm3.c \
algo/sm3/sm3-hash-4way.c \
algo/tiger/sph_tiger.c \
algo/timetravel.c \
algo/veltor.c \
algo/whirlpool/sph_whirlpool.c \
algo/whirlpool/whirlpool-hash-4way.c \
algo/whirlpool/whirlpool-gate.c \
algo/whirlpool/whirlpool-4way.c \
algo/whirlpool/whirlpool.c \
algo/whirlpool/whirlpoolx.c \
algo/x11/x11-gate.c \
algo/x11/x11.c \
algo/x11/x11evo.c \
algo/x11/x11-4way.c \
algo/x11/x11gost-gate.c \
algo/x11/x11gost.c \
algo/x11/x11gost-4way.c \
algo/x11/c11-gate.c \
algo/x11/c11.c \
algo/x11/c11-4way.c \
algo/x11/tribus-gate.c \
algo/x11/tribus.c \
algo/x11/tribus-4way.c \
algo/x11/timetravel-gate.c \
algo/x11/timetravel.c \
algo/x11/timetravel-4way.c \
algo/x11/timetravel10-gate.c \
algo/x11/timetravel10.c \
algo/x11/timetravel10-4way.c \
algo/x11/fresh.c \
algo/x11/x11evo.c \
algo/x11/x11evo-4way.c \
algo/x11/x11evo-gate.c \
algo/x12/x12-gate.c \
algo/x12/x12.c \
algo/x12/x12-4way.c \
algo/x13/x13-gate.c \
algo/x13/x13.c \
algo/x13/x13-4way.c \
algo/x13/x13sm3-gate.c \
algo/x13/x13sm3.c \
algo/x13/x13sm3-4way.c \
algo/x13/phi1612-gate.c \
algo/x13/phi1612.c \
algo/x13/phi1612-4way.c \
algo/x13/skunk-gate.c \
algo/x13/skunk-4way.c \
algo/x13/skunk.c \
algo/x13/drop.c \
algo/x14/x14-gate.c \
algo/x14/x14.c \
algo/x14/x14-4way.c \
algo/x14/veltor-gate.c \
algo/x14/veltor.c \
algo/x14/veltor-4way.c \
algo/x14/polytimos-gate.c \
algo/x14/polytimos.c \
algo/x14/polytimos-4way.c \
algo/x14/axiom.c \
algo/x15/x15-gate.c \
algo/x15/x15.c \
algo/x15/x15-4way.c \
algo/x17/x17-gate.c \
algo/x17/x17.c \
algo/xevan.c \
algo/x17/x17-4way.c \
algo/x17/xevan-gate.c \
algo/x17/xevan.c \
algo/x17/xevan-4way.c \
algo/x17/x16r-gate.c \
algo/x17/x16r.c \
algo/x17/x16r-4way.c \
algo/x17/hmq1725.c \
algo/yescrypt/yescrypt.c \
algo/yescrypt/yescrypt-common.c \
algo/yescrypt/sha256_Y.c\
algo/yescrypt/yescrypt-simd.c\
algo/zr5.c
algo/yescrypt/sha256_Y.c \
algo/yescrypt/yescrypt-simd.c
disable_flags =

158
README.md
View File

@@ -13,64 +13,6 @@ mailto://jayddee246@gmail.com
See file RELEASE_NOTES for change log and compile instructions.
Supported Algorithms
--------------------
argon2
axiom Shabal-256 MemoHash
bastion
blake Blake-256 (SFR)
blakecoin blake256r8
blake2s Blake-2 S
bmw BMW 256
c11 Flax
cryptolight Cryptonight-light
cryptonight cryptonote, Monero (XMR)
decred
drop Dropcoin
fresh Fresh
groestl groestl
heavy Heavy
hmq1725 Espers
hodl Hodlcoin
keccak Keccak
lbry LBC, LBRY Credits
luffa Luffa
lyra2re lyra2
lyra2rev2 lyrav2
lyra2z Zcoin (XZC)
lyra2zoin Zoin (ZOI)
m7m Magi (XMG)
myr-gr Myriad-Groestl
neoscrypt NeoScrypt(128, 2, 1)
nist5 Nist5
pluck Pluck:128 (Supcoin)
pentablake Pentablake
quark Quark
qubit Qubit
scrypt scrypt(1024, 1, 1) (default)
scrypt:N scrypt(N, 1, 1)
scryptjane:nf
sha256d SHA-256d
shavite3 Shavite3
skein Skein+Sha (Skeincoin)
skein2 Double Skein (Woodcoin)
timetravel Machinecoin (MAC)
vanilla blake256r8vnl (VCash)
veltor
whirlpool
whirlpoolx
x11 X11
x11evo Revolvercoin
x11gost sib (SibCoin)
x13 X13
x14 X14
x15 X15
x17
xevan Bitsend
yescrypt
zr5 Ziftr
Requirements
------------
@@ -83,17 +25,100 @@ algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
performance.
ARM CPUs are not supported.
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
Centos are known to work and have all dependencies in their repositories.
Others may work but may require more effort.
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
3. Stratum pool, cpuminer-opt only supports stratum minning. Some algos
may work wallet mining but there are no guarantees.
MacOS, OSx is not supported.
3. Stratum pool. Some algos may work wallet mining using getwork.
Supported Algorithms
--------------------
anime Animecoin
argon2
axiom Shabal-256 MemoHash
bastion
blake Blake-256 (SFR)
blakecoin blake256r8
blake2s Blake-2 S
bmw BMW 256
c11 Chaincoin
cryptolight Cryptonight-light
cryptonight cryptonote, Monero (XMR)
decred
deep Deepcoin (DCN)
dmd-gr Diamond-Groestl
drop Dropcoin
fresh Fresh
groestl Groestl coin
heavy Heavy
hmq1725 Espers
hodl Hodlcoin
jha Jackpotcoin
keccak Maxcoin
keccakc Creative coin
lbry LBC, LBRY Credits
luffa Luffa
lyra2h Hppcoin
lyra2re lyra2
lyra2rev2 lyra2v2, Vertcoin
lyra2z Zcoin (XZC)
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
m7m Magi (XMG)
myr-gr Myriad-Groestl
neoscrypt NeoScrypt(128, 2, 1)
nist5 Nist5
pentablake Pentablake
phi1612 phi, LUX coin
pluck Pluck:128 (Supcoin)
polytimos Ninja
quark Quark
qubit Qubit
scrypt scrypt(1024, 1, 1) (default)
scrypt:N scrypt(N, 1, 1)
scryptjane:nf
sha256d Double SHA-256
sha256t Triple SHA-256, Onecoin (OC)
shavite3 Shavite3
skein Skein+Sha (Skeincoin)
skein2 Double Skein (Woodcoin)
skunk Signatum (SIGT)
timetravel Machinecoin (MAC)
timetravel10 Bitcore
tribus Denarius (DNR)
vanilla blake256r8vnl (VCash)
veltor (VLT)
whirlpool
whirlpoolx
x11 Dash
x11evo Revolvercoin
x11gost sib (SibCoin)
x12 Galaxie Cash (GCH)
x13 X13
x13sm3 hsr (Hshare)
x14 X14
x15 X15
x16r Ravencoin
x17
xevan Bitsend
yescrypt Globalboost-Y (BSTY)
yescryptr8 BitZeny (ZNY)\n\
yescryptr16 Yenten (YTN)
zr5 Ziftr
Errata
------
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
these CPUs. Some algos may crash the miner with an invalid instruction.
Users are recommended to use an unoptimized miner such as cpuminer-multi.
cpuminer-opt does not work mining Decred algo at Nicehash and produces
only "invalid extranonce2 size" rejects.
@@ -107,13 +132,20 @@ forum at:
https://bitcointalk.org/index.php?topic=1326803.0
All problem reports must be accompanied by a proper definition.
This should include how the problem occurred, the command line and
output from the miner showing the startup and any errors.
Donations
---------
I do not do this for money but I have a donation address if users
are so inclined.
cpuminer-opt has no fees of any kind but donations are accepted.
bitcoin:12tdvfF7KmAsihBXQXynT6E6th2c2pByTT?label=donations
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
Happy mining!

View File

@@ -1,6 +1,9 @@
This file is included in the Windows binary package. Compile instructions
for Linux and Windows can be found in RELEASE_NOTES.
cpuminer is a console program that is executed from a DOS command prompt.
There is no GUI and no mouse support.
Choose the exe that best matches you CPU's features or use trial and
error to find the fastest one that doesn't crash. Pay attention to
the features listed at cpuminer startup to ensure you are mining at
@@ -8,15 +11,26 @@ optimum speed using all the available features.
Architecture names and compile options used are only provided for Intel
Core series. Pentium and Celeron often have fewer features.
AMD is YMMV, see previous paragraph.
Exe name Compile opts Arch name
cpuminer-sse2.exe -march=core2, Core2
cpuminer-sse42.exe -march=corei7, Nehalem
cpuminer-aes-sse42.exe -maes -msse4.2 Westmere
cpuminer-aes-avx.exe -march=corei7-avx, Sandybridge, Ivybridge
cpuminer-aes-avx2.exe -march=core-avx2, Haswell, Broadwell, Skylake, Kabylake
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
these CPUs. Some algos may crash the miner with an invalid instruction.
Users are recommended to use an unoptimized miner such as cpuminer-multi.
Exe name Compile flags Arch name
cpuminer-sse2.exe "-march=core2" Core2, Nehalem
cpuminer-aes-sse42.exe "-maes -msse4.2" Westmere
cpuminer-aes-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
cpuminer-avx2.exe "-march=core-avx2" Haswell...
cpuminer-avx2-sha.exe "-march=core-avx2 -msha" Ryzen
If you like this software feel free to donate:
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ

View File

@@ -1,8 +1,368 @@
Compile instruction for Linux and Windows are at the bottom of this file.
cpuminer-opt now supports HW SHA acceleration available on AMD Ryzen CPUs.
This feature requires recent SW including GCC version 5 or higher and
openssl version 1.1 or higher. It may also require using "-march=znver1"
compile flag.
HW SHA support is only available when compiled from source, Windows binaries
are not yet available.
cpuminer-opt is a console program, if you're using a mouse you're doing it
wrong.
Security warning
----------------
Miner programs are often flagged as malware by antivirus programs. This is
a false positive, they are flagged simply because they are miners. The source
code is open for anyone to inspect. If you don't trust the software, don't use
it.
The cryptographic code has been taken from trusted sources but has been
modified for speed at the expense of accepted security practices. This
code should not be imported into applications where secure cryptography is
required.
Compile Instructions
--------------------
Requirements:
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
supported.
64 bit Linux or Windows operating system. Apple is not supported.
Building on linux prerequisites:
It is assumed users know how to install packages on their system and
be able to compile standard source packages. This is basic Linux and
beyond the scope of cpuminer-opt.
Make sure you have the basic development packages installed.
Here is a good start:
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
Install any additional dependencies needed by cpuminer-opt. The list below
are some of the ones that may not be in the default install and need to
be installed manually. There may be others, read the error messages they
will give a clue as to the missing package.
The following command should install everything you need on Debian based
distributions such as Ubuntu:
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake
build-essential (for Ubuntu, Development Tools package group on Fedora)
automake
libjansson-dev
libgmp-dev
libcurl4-openssl-dev
libssl-dev
pthreads
zlib
SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and openssl 1.1
or higher. Reports of improved performiance on Ryzen when using openssl 1.0.2
have been due to AVX and AVX2 optimizations added to that version.
Additional improvements are expected on Ryzen with openssl 1.1.
"-march-znver1" or "-msha".
Additional instructions for static compilalation can be found here:
https://lxadm.com/Static_compilation_of_cpuminer
Static builds should only considered in a homogeneous HW and SW environment.
Local builds will always have the best performance and compatibility.
Extract cpuminer source.
tar xvzf cpuminer-opt-x.y.z.tar.gz
cd cpuminer-opt-x.y.z
Run ./build.sh to build on Linux or execute the following commands.
./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make
Additional optional compile flags, add the following to CFLAGS to activate:
-DUSE_SPH_SHA
SPH may give slightly better performance on algos that use sha256 when using
openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
better than SPH.
Start mining.
./cpuminer -a algo -o url -u username -p password
Windows
Precompiled Windows binaries are built on a Linux host using Mingw
with a more recent compiler than the following Windows hosted procedure.
Building on Windows prerequisites:
msys
mingw_w64
Visual C++ redistributable 2008 X64
openssl
Install msys and mingw_w64, only needed once.
Unpack msys into C:\msys or your preferred directory.
Install mingw_w64 from win-builds.
Follow instructions, check "msys or cygwin" and "x86_64" and accept default
existing msys instalation.
Open a msys shell by double clicking on msys.bat.
Note that msys shell uses linux syntax for file specifications, "C:\" is
mounted at "/c/".
Add mingw bin directory to PATH variable
PATH="/c/msys/opt/windows_64/bin/:$PATH"
Instalation complete, compile cpuminer-opt.
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
or similar Windows program.
In msys shell cd to miner directory.
cd /c/path/to/cpuminer-opt
Run build.sh to build on Windows or execute the following commands.
./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
make
Start mining
cpuminer.exe -a algo -o url -u user -p password
The following tips may be useful for older AMD CPUs.
AMD CPUs older than Steamroller, including Athlon x2 and Phenom II x4, are
not supported by cpuminer-opt due to an incompatible implementation of SSE2
on these CPUs. Some algos may crash the miner with an invalid instruction.
Users are recommended to use an unoptimized miner such as cpuminer-multi.
Some users with AMD CPUs without AES_NI have reported problems compiling
with build.sh or "-march=native". Problems have included compile errors
and poor performance. These users are recommended to compile manually
specifying "-march=btver1" on the configure command line.
Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------
v3.8.2
Fixed and faster myr-gr.
Added x12 algo (Galaxie Cash), allium algo (Garlicoin).
Faster lyra2rev2, lbry, skein.
Large reduction in compiler warnings.
v3.8.1.1
Fixed Windows AVX2 crash.
v3.8.1
Fixes x16r on CPUs with only SSE2.
More Optimizations for X algos, qubit & deep.
Corrected algo optimizations for scrypt and yescrypt, no new optimizations.
v3.8.0.1
Fixed x16r AVX2 low hash rate.
v3.8.0
4way no longer a seperate feature, included in AVX2.
Added x16r algo for Ravencoin, anime algo for Animecoin.
More 4way optimizations for X13 and up.
Tweaked CPU affinity to better support more than 64 CPUs.
Fixed compile problem on some old AMD CPUs.
v3.7.10
4way optimizations for lyra2rev2, lyra2h, quark, timetravel8, timetravel10
x11evo, blakecoin.
Faster x13sm3 (hsr).
Added share difficulty to accepted message.
v3.7.9
Partial 4way optimizations for veltor, skunk, polytimos, lyra2z.
Additional 4way optimizations for X algos.
New algo yescryptr8 for BitZeny, not to be confused with original
yescrypt Globalboost-Y.
v3.7.8
Partial 4way optimization for most X algos including c11, xevan, phi, hsr
v3.7.7
Fixed regression caused by 64 CPU support.
Fixed lyra2h.
v3.7.6
Added lyra2h algo for Hppcoin.
Added support for more than 64 CPUs.
Optimized shavite512 with AES, improves x11 etc.
v3.7.5
New algo keccakc for Creative coin with 4way optimizations
Rewrote some AVX/AVX2 code for more consistent implementation and some
optimizing.
Enhanced capabilities check to support 4way, more precise reporting of
features (not all algos use SSE2), and better error messages when using
an incompatible pre-built version (Windows users).
v3.7.4
Removed unnecessary build options.
Added 4way support for tribus and nist5.
v3.7.3
Added polytimos algo.
Introducing 4-way AVX2 optimization giving up to 4x performance inprovement
on many compute bound algos. First supported algos: skein, skein2, blake &
keccak. This feature is only available when compiled from source. See above
for instcuctions how to enable 4-way during compilation.
Updated Dockerfile.
v3.7.2
Fixed yescryptr16
Changed default sha256 and sha512 to openssl. This should be used when
compiling with openssl 1.0.2 or higher (Ubuntu 16.04).
This should increase the hashrate for yescrypt, yescryptr16, m7m, xevan, skein,
myr-gr & others when openssl 1.0.2 is installed.
Users with openssl 1.0.1 (Ubuntu 14.04) may get better perforance by adding
"-DUSE_SPH_SHA" to CLAGS.
Windows binaries are compiled with -DUSE_SPH_SHA and won't get the speedup.
v3.7.1
Added yescryptr16 algo for Yenten coin
Added SHA support to yescrypt and yescryptr16
Small code cleanup
v3.7.0
Fixed x14 misalignment bug.
Fixed decred stake version bug.
Getwork fixes for algos that use big endian data encoding: m7m, zr5, neoscrypt,
decred.
v3.6.10
Fixed misalignment bug in hsr.
v3.6.9
Added phi1612 algo for LUX coin
Added x13sm3 algo, alias hsr, for Hshare coin
v3.6.8
Fixed timetravel10 on Windows.
v3.6.7
Skunk algo added.
Tribus a little faster.
Minor restructuring.
v3.6.6
added tribus algo for Denarius (DNR)
configure removed from .gitignore. This should allow git clone to compile
on Windows/mingw.
Fixed CPU temperature monitoring on some CPUs (Linux only).
Fixed a compile error on FreeBSD (unsupported YMMV).
v3.6.5
Cryptonight a little faster.
Added jha algo (Jackpotcoin) with AES optimizations.
v3.6.4
Added support for Bitcore (BTX) using the timetravel10 algo, optimized for
AES and AVX2.
"-a bitcore" works as an alias and is less typing that "-a timetravel10".
v3.6.3
Fixed all known issues with SHA support on AMD Ryzen CPUs, still no
Windows binaries.
v3.6.2
SHA accceleration is now supported on AMD Ryzen CPUs when compiled from source,
Windows binaries not yet available.
Fixed groestl algo.
Fixed dmd-gr (Diamond) algo.
Fixed lbry compile error on Ryzen.
Added SHA support to m7m algo.
Hodl support for CPUs without AES has been removed, use legacy version.
v3.6.1
Fixed data alignment issue that broke lyra2rev2 AVX2 on Windows.
Added preliminary support for HW accelerated SHA.
Solo mining most algos should now work, cryptonight confirmed exception.
v3.6.0
Preliminary support for solo mining using getwork.
v3.5.13
Found more speed in Cubehash, algo improvement depends on chain length,
deep +8%, timetravel +1% , xevan +1%
Fixed a getwork bug, solo mining is not yet supported but testing is encouraged
v3.5.12
New algo sha256t for Onecoin (OC), 29% faster than ocminer version.
lyra2zoin algo renamed to lyra2z330, lyra2zoin and zoin still work
as aliases.
v3.5.11
Fixed hmq1725 crash on Ubuntu 16.04
Fixed compile error in hodl.cpp with gcc 6.3
Fixed x11 crash on Windows with AVX2
v3.5.10
Some AVX2 optimizations introduced for Luffa, shorter chained algos such
as Qubit and Deep should see the biggest gains, but many other algos should
also see improvement, longer chains like xevan not so much.
Rewrite of Groestl AES, now 100% vectorized, small improvement.
build.sh and winbuild.sh initialize with distclean instead of clean.
Implemented a workaround for a compile error in hodl code when compiling
with gcc 6.3.
V3.5.9
Reduced stack usage for hmq1725 and small speedup.
@@ -168,98 +528,3 @@ AVX2 optimizations improving many algos:
- x17 +2.8%
- qubit +8.4%
Compile Instructions
--------------------
Building on linux prerequisites:
It is assumed users know how to install packages on their system and
be able to compile standard source packages. This is basic Linux and
beyond the scope of cpuminer-opt.
Make sure you have the basic development packages installed.
Here is a good start:
http://askubuntu.com/questions/457526/how-to-install-cpuminer-in-ubuntu
Install any additional dependencies needed by cpuminer-opt. The list below
are some of the ones that may not be in the default install and need to
be installed manually. There may be others, read the error messages they
will give a clue as to the missing package.
The folliwing command should install everything you need on Debian based
packages:
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake
Building on Linux, see below for Windows.
Dependencies
build-essential (for Ubuntu, Development Tools package group on Fedora)
automake
libjansson-dev
libgmp-dev
libcurl4-openssl-dev
libssl-dev
pthreads
zlib
tar xvzf [file.tar.gz]
cd [file]
Run build.sh to build on Linux or execute the following commands.
./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make
Start mining.
./cpuminer -a algo ...
Building on Windows prerequisites:
msys
mingw_w64
Visual C++ redistributable 2008 X64
openssl, not sure about this
Install msys and mingw_w64, only needed once.
Unpack msys into C:\msys or your preferred directory.
Install mingw__w64 from win-builds.
Follow instructions, check "msys or cygwin" and "x86_64" and accept default
existing msys instalation.
Open a msys shell by double clicking on msys.bat.
Note that msys shell uses linux syntax for file specifications, "C:\" is
mounted at "/c/".
Add mingw bin directory to PATH variable
PATH="/c/msys/opt/windows_64/bin/:$PATH"
Instalation complete, compile cpuminer-opt
Unpack cpuminer-opt source files using tar from msys shell, or using 7zip
or similar Windows program.
In msys shell cd to miner directory.
cd /c/path/to/cpuminer-opt
Run winbuild.sh to build on Windows or execute the following commands.
./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
make
The following tips may be useful for older AMD CPUs.
Some users with AMD CPUs without AES_NI have reported problems compiling
with build.sh or "-march=native". Problems have included compile errors
and poor performance. These users are recommended to compile manually
specifying "-march=btver1" on the configure command line.
Support for even older x86_64 without AES_NI or SSE2 is not availble.

View File

@@ -16,7 +16,7 @@
#include <memory.h>
#include <unistd.h>
#include <openssl/sha.h>
#include "miner.h"
//#include "miner.h"
#include "algo-gate-api.h"
// Define null and standard functions.
@@ -77,6 +77,12 @@ void algo_not_tested()
applog(LOG_WARNING,"and bad things may happen. Use at your own risk.");
}
void four_way_not_tested()
{
applog( LOG_WARNING,"Algo %s has not been tested using 4way. It may not", algo_names[opt_algo] );
applog( LOG_WARNING,"work or may be slower. Please report your results.");
}
void algo_not_implemented()
{
applog(LOG_ERR,"Algo %s has not been Implemented.",algo_names[opt_algo]);
@@ -98,17 +104,12 @@ void null_hash_suw()
{
applog(LOG_WARNING,"SWERR: null_hash_suw unsafe null function");
};
void null_hash_alt()
{
applog(LOG_WARNING,"SWERR: null_hash_alt unsafe null function");
};
void init_algo_gate( algo_gate_t* gate )
{
gate->miner_thread_init = (void*)&return_true;
gate->scanhash = (void*)&null_scanhash;
gate->hash = (void*)&null_hash;
gate->hash_alt = (void*)&null_hash_alt;
gate->hash_suw = (void*)&null_hash_suw;
gate->get_new_work = (void*)&std_get_new_work;
gate->get_nonceptr = (void*)&std_get_nonceptr;
@@ -119,17 +120,17 @@ void init_algo_gate( algo_gate_t* gate )
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
gate->set_target = (void*)&std_set_target;
gate->submit_getwork_result = (void*)&std_submit_getwork_result;
gate->work_decode = (void*)&std_le_work_decode;
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
gate->build_extraheader = (void*)&std_build_extraheader;
gate->set_work_data_endian = (void*)&do_nothing;
gate->calc_network_diff = (void*)&std_calc_network_diff;
// gate->prevent_dupes = (void*)&return_false;
gate->ready_to_mine = (void*)&std_ready_to_mine;
gate->resync_threads = (void*)&do_nothing;
gate->do_this_thread = (void*)&return_true;
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
gate->stratum_handle_response = (void*)&std_stratum_handle_response;
gate->optimizations = SSE2_OPT;
gate->optimizations = EMPTY_SET;
gate->ntime_index = STD_NTIME_INDEX;
gate->nbits_index = STD_NBITS_INDEX;
gate->nonce_index = STD_NONCE_INDEX;
@@ -137,6 +138,10 @@ void init_algo_gate( algo_gate_t* gate )
gate->work_cmp_size = STD_WORK_CMP_SIZE;
}
// Ignore warnings for not yet defined register functions
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-function-declaration"
// called by each thread that uses the gate
bool register_algo_gate( int algo, algo_gate_t *gate )
{
@@ -150,71 +155,77 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
switch (algo)
{
// Ignore warnings for not yet defined register fucntions
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-function-declaration"
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
case ALGO_BASTION: register_bastion_algo ( gate ); break;
case ALGO_BLAKE: register_blake_algo ( gate ); break;
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
case ALGO_C11: register_c11_algo ( gate ); break;
case ALGO_CRYPTOLIGHT: register_cryptolight_algo( gate ); break;
case ALGO_CRYPTONIGHT: register_cryptonight_algo( gate ); break;
case ALGO_DECRED: register_decred_algo ( gate ); break;
case ALGO_DEEP: register_deep_algo ( gate ); break;
case ALGO_DROP: register_drop_algo ( gate ); break;
case ALGO_FRESH: register_fresh_algo ( gate ); break;
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
case ALGO_HODL: register_hodl_algo ( gate ); break;
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
case ALGO_LBRY: register_lbry_algo ( gate ); break;
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
case ALGO_LYRA2Z: register_zcoin_algo ( gate ); break;
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
case ALGO_M7M: register_m7m_algo ( gate ); break;
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
case ALGO_NIST5: register_nist5_algo ( gate ); break;
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
case ALGO_QUARK: register_quark_algo ( gate ); break;
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
case ALGO_SKEIN: register_skein_algo ( gate ); break;
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
case ALGO_S3: register_s3_algo ( gate ); break;
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
case ALGO_X11: register_x11_algo ( gate ); break;
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
case ALGO_X11GOST: register_sib_algo ( gate ); break;
case ALGO_X13: register_x13_algo ( gate ); break;
case ALGO_X14: register_x14_algo ( gate ); break;
case ALGO_X15: register_x15_algo ( gate ); break;
case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;
// restore warnings
#pragma GCC diagnostic pop
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
case ALGO_ANIME: register_anime_algo ( gate ); break;
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
case ALGO_BASTION: register_bastion_algo ( gate ); break;
case ALGO_BLAKE: register_blake_algo ( gate ); break;
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
case ALGO_C11: register_c11_algo ( gate ); break;
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
case ALGO_DECRED: register_decred_algo ( gate ); break;
case ALGO_DEEP: register_deep_algo ( gate ); break;
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
case ALGO_DROP: register_drop_algo ( gate ); break;
case ALGO_FRESH: register_fresh_algo ( gate ); break;
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
case ALGO_HODL: register_hodl_algo ( gate ); break;
case ALGO_JHA: register_jha_algo ( gate ); break;
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
case ALGO_LBRY: register_lbry_algo ( gate ); break;
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
case ALGO_M7M: register_m7m_algo ( gate ); break;
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
case ALGO_NIST5: register_nist5_algo ( gate ); break;
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
case ALGO_QUARK: register_quark_algo ( gate ); break;
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
case ALGO_SKEIN: register_skein_algo ( gate ); break;
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
case ALGO_TIMETRAVEL10: register_timetravel10_algo( gate ); break;
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
case ALGO_X11: register_x11_algo ( gate ); break;
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
case ALGO_X12: register_x12_algo ( gate ); break;
case ALGO_X13: register_x13_algo ( gate ); break;
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
case ALGO_X14: register_x14_algo ( gate ); break;
case ALGO_X15: register_x15_algo ( gate ); break;
case ALGO_X16R: register_x16r_algo ( gate ); break;
case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;
default:
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
return false;
@@ -229,6 +240,9 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
return true;
}
// restore warnings
#pragma GCC diagnostic pop
// override std defaults with jr2 defaults
bool register_json_rpc2( algo_gate_t *gate )
{
@@ -244,6 +258,7 @@ bool register_json_rpc2( algo_gate_t *gate )
gate->nonce_index = JR2_NONCE_INDEX;
jsonrpc_2 = true; // still needed
opt_extranonce = false;
// have_gbt = false;
return true;
}
@@ -256,41 +271,48 @@ void exec_hash_function( int algo, void *output, const void *pdata )
gate.hash( output, pdata, 0 );
}
// an algo can have multiple aliases but the aliases must be unique
#define PROPER (1)
#define ALIAS (0)
// The only difference between the alias and the proper algo name is the
// proper name s the one that is defined in ALGO_NAMES, there may be
// proper name is the one that is defined in ALGO_NAMES. There may be
// multiple aliases that map to the same proper name.
// New aliases can be added anywhere in the array as long as NULL is last.
// Alphabetic order of alias is recommended.
const char* const algo_alias_map[][2] =
{
// alias proper
{ "blake256r8", "blakecoin" },
{ "blake256r8vnl", "vanilla" },
{ "sia", "blake2b" },
{ "blake256r14", "blake" },
{ "cryptonote", "cryptonight" },
{ "cryptonight-light", "cryptolight" },
{ "dmd-gr", "groestl" },
{ "droplp", "drop" },
{ "espers", "hmq1725" },
{ "flax", "c11" },
{ "jane", "scryptjane" },
{ "lyra2", "lyra2re" },
{ "lyra2v2", "lyra2rev2" },
{ "lyra2zoin", "lyra2z330" },
{ "myriad", "myr-gr" },
{ "neo", "neoscrypt" },
{ "sib", "x11gost" },
{ "yes", "yescrypt" },
{ "ziftr", "zr5" },
{ "zcoin", "lyra2z" },
{ "zoin", "lyra2z330" },
{ NULL, NULL }
{ "bitcore", "timetravel10" },
{ "bitzeny", "yescryptr8" },
{ "blake256r8", "blakecoin" },
{ "blake256r8vnl", "vanilla" },
{ "blake256r14", "blake" },
{ "blake256r14dcr", "decred" },
{ "cryptonote", "cryptonight" },
{ "cryptonight-light", "cryptolight" },
{ "diamond", "dmd-gr" },
{ "droplp", "drop" },
{ "espers", "hmq1725" },
{ "flax", "c11" },
{ "hsr", "x13sm3" },
{ "jackpot", "jha" },
{ "jane", "scryptjane" },
{ "lyra2", "lyra2re" },
{ "lyra2v2", "lyra2rev2" },
{ "lyra2zoin", "lyra2z330" },
{ "myrgr", "myr-gr" },
{ "myriad", "myr-gr" },
{ "neo", "neoscrypt" },
{ "phi", "phi1612" },
// { "sia", "blake2b" },
{ "sib", "x11gost" },
{ "timetravel8", "timetravel" },
{ "ziftr", "zr5" },
{ "yenten", "yescryptr16" },
{ "yescryptr8k", "yescrypt" },
{ "zcoin", "lyra2z" },
{ "zoin", "lyra2z330" },
{ NULL, NULL }
};
// if arg is a valid alias for a known algo it is updated with the proper name.

View File

@@ -1,7 +1,6 @@
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include "miner.h"
/////////////////////////////
@@ -85,11 +84,13 @@
typedef uint32_t set_t;
#define EMPTY_SET 0
#define SSE2_OPT 1
#define AES_OPT 2
#define AVX_OPT 4
#define AVX2_OPT 8
#define EMPTY_SET 0
#define SSE2_OPT 1
#define AES_OPT 2
#define AVX_OPT 4
#define AVX2_OPT 8
#define SHA_OPT 0x10
//#define FOUR_WAY_OPT 0x20
// return set containing all elements from sets a & b
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
@@ -110,7 +111,6 @@ int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
// optional unsafe, must be overwritten if algo uses function
void ( *hash ) ( void*, const void*, uint32_t ) ;
void ( *hash_alt ) ( void*, const void*, uint32_t );
void ( *hash_suw ) ( void*, const void* );
//optional, safe to use default in most cases
@@ -130,7 +130,6 @@ void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
void ( *set_work_data_endian ) ( struct work* );
double ( *calc_network_diff ) ( struct work* );
//bool ( *prevent_dupes ) ( struct work*, struct stratum_ctx*, int );
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
void ( *resync_threads ) ( struct work* );
bool ( *do_this_thread ) ( int );
@@ -157,7 +156,7 @@ bool return_false();
void *return_null();
void algo_not_tested();
void algo_not_implemented();
void four_way_not_tested();
// Warning: algo_gate.nonce_index should only be used in targetted code
// due to different behaviours by different targets. The JR2 index uses an
@@ -185,7 +184,6 @@ int null_scanhash();
// displays warning
void null_hash ();
void null_hash_alt();
void null_hash_suw();
// optional safe targets, default listed first unless noted.
@@ -214,21 +212,24 @@ int64_t get_max64_0x3fffffLL();
int64_t get_max64_0x1ffff();
int64_t get_max64_0xffffLL();
void std_set_target ( struct work *work, double job_diff );
void std_set_target( struct work *work, double job_diff );
void alt_set_target( struct work* work, double job_diff );
void scrypt_set_target( struct work *work, double job_diff );
bool std_work_decode( const json_t *val, struct work *work );
bool std_le_work_decode( const json_t *val, struct work *work );
bool std_be_work_decode( const json_t *val, struct work *work );
bool jr2_work_decode( const json_t *val, struct work *work );
bool std_submit_getwork_result( CURL *curl, struct work *work );
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
void std_le_build_stratum_request( char *req, struct work *work );
void std_be_build_stratum_request( char *req, struct work *work );
void jr2_build_stratum_request ( char *req, struct work *work );
// set_work_data_endian target, default is do_nothing;
void swab_work_data( struct work *work );
// Default is do_nothing (assumed LE)
void set_work_data_big_endian( struct work *work );
double std_calc_network_diff( struct work *work );

View File

@@ -1,5 +1,3 @@
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

View File

@@ -1,203 +0,0 @@
#include <ccminer-config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <inttypes.h>
#include <unistd.h>
#include <math.h>
#include <sys/time.h>
#include <time.h>
#include <signal.h>
#include <curl/curl.h>
#include <miner.h>
#include "sia-rpc.h"
static bool sia_debug_diff = false;
extern int share_result(int result, int pooln, double sharediff, const char *reason);
/* compute nbits to get the network diff */
static void calc_network_diff(struct work *work)
{
uint32_t nbits = work->data[11]; // unsure if correct
uint32_t bits = (nbits & 0xffffff);
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
uint64_t diffone = 0x0000FFFF00000000ull;
double d = (double)0x0000ffff / (double)bits;
for (int m=shift; m < 29; m++) d *= 256.0;
for (int m=29; m < shift; m++) d /= 256.0;
if (sia_debug_diff)
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
net_diff = d;
}
// ---- SIA LONGPOLL --------------------------------------------------------------------------------
struct data_buffer {
void *buf;
size_t len;
};
static size_t sia_data_cb(const void *ptr, size_t size, size_t nmemb,
void *user_data)
{
struct data_buffer *db = (struct data_buffer *)user_data;
size_t len = size * nmemb;
size_t oldlen, newlen;
void *newmem;
static const uchar zero = 0;
oldlen = db->len;
newlen = oldlen + len;
newmem = realloc(db->buf, newlen + 1);
if (!newmem)
return 0;
db->buf = newmem;
db->len = newlen;
memcpy((char*)db->buf + oldlen, ptr, len);
memcpy((char*)db->buf + newlen, &zero, 1); /* null terminate */
return len;
}
char* sia_getheader(CURL *curl, struct pool_infos *pool)
{
char curl_err_str[CURL_ERROR_SIZE] = { 0 };
struct data_buffer all_data = { 0 };
struct curl_slist *headers = NULL;
char data[256] = { 0 };
char url[512];
// nanopool
snprintf(url, 512, "%s/miner/header?address=%s&worker=%s", //&longpoll
pool->url, pool->user, pool->pass);
if (opt_protocol)
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_POST, 0);
curl_easy_setopt(curl, CURLOPT_ENCODING, "");
curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, opt_timeout);
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, sia_data_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data);
headers = curl_slist_append(headers, "Accept: application/octet-stream");
headers = curl_slist_append(headers, "Expect:"); // disable Expect hdr
headers = curl_slist_append(headers, "User-Agent: Sia-Agent"); // required for now
// headers = curl_slist_append(headers, "User-Agent: " USER_AGENT);
// headers = curl_slist_append(headers, "X-Mining-Extensions: longpoll");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
int rc = curl_easy_perform(curl);
if (rc && strlen(curl_err_str)) {
applog(LOG_WARNING, "%s", curl_err_str);
}
if (all_data.len >= 112)
cbin2hex(data, (const char*) all_data.buf, 112);
if (opt_protocol || all_data.len != 112)
applog(LOG_DEBUG, "received %d bytes: %s", (int) all_data.len, data);
curl_slist_free_all(headers);
return rc == 0 && all_data.len ? strdup(data) : NULL;
}
bool sia_work_decode(const char *hexdata, struct work *work)
{
uint8_t target[32];
if (!work) return false;
hex2bin((uchar*)target, &hexdata[0], 32);
swab256(work->target, target);
work->targetdiff = target_to_diff(work->target);
hex2bin((uchar*)work->data, &hexdata[64], 80);
// high 16 bits of the 64 bits nonce
work->data[9] = rand() << 16;
// use work ntime as job id
cbin2hex(work->job_id, (const char*)&work->data[10], 4);
calc_network_diff(work);
if (stratum_diff != work->targetdiff) {
stratum_diff = work->targetdiff;
applog(LOG_WARNING, "Pool diff set to %g", stratum_diff);
}
return true;
}
bool sia_submit(CURL *curl, struct pool_infos *pool, struct work *work)
{
char curl_err_str[CURL_ERROR_SIZE] = { 0 };
struct data_buffer all_data = { 0 };
struct curl_slist *headers = NULL;
char buf[256] = { 0 };
char url[512];
if (opt_protocol)
applog_hex(work->data, 80);
//applog_hex(&work->data[8], 16);
//applog_hex(&work->data[10], 4);
// nanopool
snprintf(url, 512, "%s/miner/header?address=%s&worker=%s",
pool->url, pool->user, pool->pass);
if (opt_protocol)
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_ENCODING, "");
curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, sia_data_cb);
memcpy(buf, work->data, 80);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, 80);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, (void*) buf);
// headers = curl_slist_append(headers, "Content-Type: application/octet-stream");
// headers = curl_slist_append(headers, "Content-Length: 80");
headers = curl_slist_append(headers, "Accept:"); // disable Accept hdr
headers = curl_slist_append(headers, "Expect:"); // disable Expect hdr
headers = curl_slist_append(headers, "User-Agent: Sia-Agent");
// headers = curl_slist_append(headers, "User-Agent: " USER_AGENT);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
int res = curl_easy_perform(curl) == 0;
long errcode;
CURLcode c = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &errcode);
if (errcode != 204) {
if (strlen(curl_err_str))
applog(LOG_ERR, "submit err %ld %s", errcode, curl_err_str);
res = 0;
}
share_result(res, work->pooln, work->sharediff[0], res ? NULL : (char*) all_data.buf);
curl_slist_free_all(headers);
return true;
}
// ---- END SIA LONGPOLL ----------------------------------------------------------------------------

View File

@@ -1,6 +0,0 @@
#include <miner.h>
char* sia_getheader(CURL *curl, struct pool_infos *pool);
bool sia_work_decode(const char *hexdata, struct work *work);
bool sia_submit(CURL *curl, struct pool_infos *pool, struct work *work);

97
algo/blake/blake-4way.c Normal file
View File

@@ -0,0 +1,97 @@
#include "blake-gate.h"
#if defined (BLAKE_4WAY)
#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>
blake256r14_4way_context blake_ctx;
void blakehash_4way(void *state, const void *input)
{
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake256r14_4way_context ctx;
memcpy( &ctx, &blake_ctx, sizeof ctx );
blake256r14_4way( &ctx, input + (64<<2), 16 );
blake256r14_4way_close( &ctx, vhash );
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t HTarget = ptarget[7];
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
if (opt_benchmark)
HTarget = 0x7f;
// we need big endian data...
swab32_array( edata, pdata, 20 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
blake256r14_4way_init( &blake_ctx );
blake256r14_4way( &blake_ctx, vdata, 64 );
uint32_t *noncep = vdata + 76; // 19*4
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );
blakehash_4way( hash, vdata );
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

26
algo/blake/blake-gate.c Normal file
View File

@@ -0,0 +1,26 @@
#include "blake-gate.h"
int64_t blake_get_max64 ()
{
return 0x7ffffLL;
}
bool register_blake_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&blake_get_max64;
//#if defined (__AVX2__) && defined (FOUR_WAY)
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
// gate->scanhash = (void*)&scanhash_blake_8way;
// gate->hash = (void*)&blakehash_8way;
#if defined(BLAKE_4WAY)
four_way_not_tested();
gate->scanhash = (void*)&scanhash_blake_4way;
gate->hash = (void*)&blakehash_4way;
#else
gate->scanhash = (void*)&scanhash_blake;
gate->hash = (void*)&blakehash;
#endif
return true;
}

21
algo/blake/blake-gate.h Normal file
View File

@@ -0,0 +1,21 @@
#ifndef __BLAKE_GATE_H__
#define __BLAKE_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__)
#define BLAKE_4WAY
#endif
#if defined (BLAKE_4WAY)
void blakehash_4way(void *state, const void *input);
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void blakehash( void *state, const void *input );
int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

1499
algo/blake/blake-hash-4way.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,143 @@
/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */
/**
* BLAKE interface. BLAKE is a family of functions which differ by their
* output size; this implementation defines BLAKE for output sizes 224,
* 256, 384 and 512 bits. This implementation conforms to the "third
* round" specification.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_blake.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef __BLAKE_HASH_4WAY__
#define __BLAKE_HASH_4WAY__
#ifdef __AVX__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#define SPH_SIZE_blake256 256
#define SPH_SIZE_blake512 512
// With AVX only Blake-256 4 way is available.
// With AVX2 Blake-256 8way & Blake-512 4 way are also available.
// Blake-256 4 way
typedef struct {
__m128i buf[16] __attribute__ ((aligned (64)));
__m128i H[8];
__m128i S[4];
size_t ptr;
sph_u32 T0, T1;
int rounds; // 14 for blake, 8 for blakecoin & vanilla
} blake_4way_small_context;
// Default 14 rounds
typedef blake_4way_small_context blake256_4way_context;
void blake256_4way_init(void *cc);
void blake256_4way(void *cc, const void *data, size_t len);
void blake256_4way_close(void *cc, void *dst);
// 14 rounds, blake, decred
typedef blake_4way_small_context blake256r14_4way_context;
void blake256r14_4way_init(void *cc);
void blake256r14_4way(void *cc, const void *data, size_t len);
void blake256r14_4way_close(void *cc, void *dst);
// 8 rounds, blakecoin, vanilla
typedef blake_4way_small_context blake256r8_4way_context;
void blake256r8_4way_init(void *cc);
void blake256r8_4way(void *cc, const void *data, size_t len);
void blake256r8_4way_close(void *cc, void *dst);
#ifdef __AVX2__
// Blake-256 8 way
typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
__m256i H[8];
__m256i S[4];
size_t ptr;
sph_u32 T0, T1;
int rounds; // 14 for blake, 8 for blakecoin & vanilla
} blake_8way_small_context;
// Default 14 rounds
typedef blake_8way_small_context blake256_8way_context;
void blake256_8way_init(void *cc);
void blake256_8way(void *cc, const void *data, size_t len);
void blake256_8way_close(void *cc, void *dst);
// 14 rounds, blake, decred
typedef blake_8way_small_context blake256r14_8way_context;
void blake256r14_8way_init(void *cc);
void blake256r14_8way(void *cc, const void *data, size_t len);
void blake256r14_8way_close(void *cc, void *dst);
// 8 rounds, blakecoin, vanilla
typedef blake_8way_small_context blake256r8_8way_context;
void blake256r8_8way_init(void *cc);
void blake256r8_8way(void *cc, const void *data, size_t len);
void blake256r8_8way_close(void *cc, void *dst);
// Blake-512 4 way
typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
__m256i H[8];
__m256i S[4];
size_t ptr;
sph_u64 T0, T1;
} blake_4way_big_context;
typedef blake_4way_big_context blake512_4way_context;
void blake512_4way_init(void *cc);
void blake512_4way(void *cc, const void *data, size_t len);
void blake512_4way_close(void *cc, void *dst);
void blake512_4way_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif
#ifdef __cplusplus
}
#endif
#endif
#endif

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "sph_blake.h"
@@ -21,7 +20,7 @@ void blakehash(void *state, const void *input)
{
sph_blake256_context ctx;
uint8_t hash[64];
uint8_t hash[64] __attribute__ ((aligned (32)));
uint8_t *ending = (uint8_t*) input;
ending += 64;
@@ -90,19 +89,3 @@ int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
return 0;
}
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blake_get_max64 ()
{
return 0x7ffffLL;
}
bool register_blake_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_blake;
gate->hash = (void*)&blakehash;
gate->hash_alt = (void*)&blakehash;
gate->get_max64 = (void*)&blake_get_max64;
return true;
}

View File

@@ -3,22 +3,20 @@
* tpruvot@github 2015-2016
*/
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
#include <stdint.h>
#include "algo/blake/sph_blake2b.h"
static __thread sph_blake2b_ctx s_midstate;
static __thread sph_blake2b_ctx s_ctx;
//static __thread sph_blake2b_ctx s_midstate;
//static __thread sph_blake2b_ctx s_ctx;
#define MIDLEN 76
#define A 64
void blake2b_hash(void *output, const void *input)
{
uint8_t _ALIGN(A) hash[32];
sph_blake2b_ctx ctx;
sph_blake2b_ctx ctx __attribute__ ((aligned (64)));
sph_blake2b_init(&ctx, 32, NULL, 0);
sph_blake2b_update(&ctx, input, 80);
@@ -27,6 +25,7 @@ void blake2b_hash(void *output, const void *input)
memcpy(output, hash, 32);
}
/*
static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
{
s_ctx.outlen = MIDLEN;
@@ -34,6 +33,7 @@ static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
sph_blake2b_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
sph_blake2b_final(&s_ctx, (uint8_t*) output);
}
*/
int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
@@ -44,7 +44,6 @@ int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
// const uint32_t first_nonce = pdata[19];
const uint32_t first_nonce = pdata[8];
uint32_t n = first_nonce;
@@ -59,7 +58,6 @@ int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
//memcpy(&s_ctx, &s_midstate, sizeof(blake2b_ctx));
do {
// be32enc(&endiandata[19], n);
be32enc(&endiandata[8], n);
//blake2b_hash_end(vhashcpu, endiandata);
blake2b_hash(vhashcpu, endiandata);
@@ -67,7 +65,6 @@ int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
if (vhashcpu[7] < Htarg && fulltest(vhashcpu, ptarget)) {
work_set_target_ratio(work, vhashcpu);
*hashes_done = n - first_nonce + 1;
// pdata[19] = n;
pdata[8] = n;
return 1;
}
@@ -75,7 +72,6 @@ int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
// pdata[19] = n;
pdata[8] = n;
return 0;
@@ -173,8 +169,8 @@ void blake2b_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
if ( memcmp( &work->data[ wkcmp_off ], &g_work->data[ wkcmp_off ], wkcmp_sz )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) )
|| strcmp( work->job_id, g_work->job_id ) )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|| strcmp( work->job_id, g_work->job_id ) ) )
{
work_free( work );
work_copy( work, g_work );
@@ -223,6 +219,8 @@ bool register_blake2b_algo( algo_gate_t* gate )
gate->hash = (void*)&blake2b_hash;
gate->calc_network_diff = (void*)&blake2b_calc_network_diff;
gate->build_stratum_request = (void*)&blake2b_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->build_extraheader = (void*)&blake2b_build_extraheader;
gate->get_new_work = (void*)&blake2b_get_new_work;
gate->get_max64 = (void*)&blake2b_get_max64;

View File

@@ -1,10 +1,9 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
#include <stdint.h>
#include "crypto/blake2s.h"
#include "sph-blake2s.h"
static __thread blake2s_state s_midstate;
static __thread blake2s_state s_ctx;
@@ -13,7 +12,7 @@ static __thread blake2s_state s_ctx;
void blake2s_hash(void *output, const void *input)
{
unsigned char _ALIGN(64) hash[BLAKE2S_OUTBYTES];
blake2s_state blake2_ctx;
blake2s_state blake2_ctx __attribute__ ((aligned (64)));
blake2s_init(&blake2_ctx, BLAKE2S_OUTBYTES);
blake2s_update(&blake2_ctx, input, 80);

106
algo/blake/blakecoin-4way.c Normal file
View File

@@ -0,0 +1,106 @@
#include "blakecoin-gate.h"
#if defined (BLAKECOIN_4WAY)
#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>
blake256r8_4way_context blakecoin_ctx;
void blakecoin_4way_hash(void *state, const void *input)
{
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake256r8_4way_context ctx;
memcpy( &ctx, &blakecoin_ctx, sizeof ctx );
blake256r8_4way( &ctx, input + (64<<2), 16 );
blake256r8_4way_close( &ctx, vhash );
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t HTarget = ptarget[7];
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
if (opt_benchmark)
HTarget = 0x7f;
// we need big endian data...
swab32_array( edata, pdata, 20 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
blake256r8_4way_init( &blakecoin_ctx );
blake256r8_4way( &blakecoin_ctx, vdata, 64 );
uint32_t *noncep = vdata + 76; // 19*4
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );
blakecoin_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
// workaround to prevent flood of hash reports when nonce range exhasuted
// and thread is spinning waiting for new work
if ( ( n >= max_nonce ) && ( *hashes_done < 10 ) )
{
*hashes_done = 0;
// sleep(1);
}
return num_found;
}
#endif

View File

@@ -0,0 +1,70 @@
#include "blakecoin-gate.h"
#include <memory.h>
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blakecoin_get_max64 ()
{
return 0x7ffffLL;
// return 0x3fffffLL;
}
// Blakecoin 4 way hashes so fast it runs out of nonces.
// This is an attempt to solve this but the result may be
// to rehash old nonces until new work is received.
void bc4w_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *end_nonce_ptr, bool clean_job )
{
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
// if ( have_stratum && ( *nonceptr >= *end_nonce_ptr ) )
// algo_gate.stratum_gen_work( &stratum, g_work );
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
|| ( *nonceptr >= *end_nonce_ptr )
|| ( ( work->job_id != g_work->job_id ) && clean_job ) )
/*
if ( memcmp( work->data, g_work->data, algo_gate.work_cmp_size )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|| ( work->job_id != g_work->job_id ) ) )
*/
{
work_free( work );
work_copy( work, g_work );
*nonceptr = 0xffffffffU / opt_n_threads * thr_id;
if ( opt_randomize )
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
// try incrementing the xnonce to chsnge the data
// for ( int i = 0; i < work->xnonce2_size && !( ++work->xnonce2[i] ); i++ );
}
else
++(*nonceptr);
}
// vanilla uses default gen merkle root, otherwise identical to blakecoin
bool register_vanilla_algo( algo_gate_t* gate )
{
#if defined(BLAKECOIN_4WAY)
// four_way_not_tested();
gate->scanhash = (void*)&scanhash_blakecoin_4way;
gate->hash = (void*)&blakecoin_4way_hash;
// gate->get_new_work = (void*)&bc4w_get_new_work;
// blakecoin_4way_init( &blake_4way_init_ctx );
#else
gate->scanhash = (void*)&scanhash_blakecoin;
gate->hash = (void*)&blakecoinhash;
// blakecoin_init( &blake_init_ctx );
#endif
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&blakecoin_get_max64;
return true;
}
bool register_blakecoin_algo( algo_gate_t* gate )
{
register_vanilla_algo( gate );
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
return true;
}

View File

@@ -0,0 +1,21 @@
#ifndef __BLAKECOIN_GATE_H__
#define __BLAKECOIN_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__)
#define BLAKECOIN_4WAY
#endif
#if defined (BLAKECOIN_4WAY)
void blakecoin_4way_hash(void *state, const void *input);
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void blakecoinhash( void *state, const void *input );
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -1,5 +1,4 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "blakecoin-gate.h"
#define BLAKE32_ROUNDS 8
#include "sph_blake.h"
@@ -30,7 +29,7 @@ static void blake_midstate_init( const void* input )
void blakecoinhash( void *state, const void *input )
{
sph_blake256_context ctx;
uint8_t hash[64];
uint8_t hash[64] __attribute__ ((aligned (32)));
uint8_t *ending = (uint8_t*) input + 64;
// copy cached midstate
@@ -93,11 +92,13 @@ int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
return 0;
}
/*
void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx )
{
SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root );
}
*/
/*
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blakecoin_get_max64 ()
{
@@ -109,7 +110,6 @@ bool register_vanilla_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_blakecoin;
gate->hash = (void*)&blakecoinhash;
gate->hash_alt = (void*)&blakecoinhash;
gate->get_max64 = (void*)&blakecoin_get_max64;
blakecoin_init( &blake_init_ctx );
return true;
@@ -121,4 +121,4 @@ bool register_blakecoin_algo( algo_gate_t* gate )
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
return true;
}
*/

101
algo/blake/decred-4way.c Normal file
View File

@@ -0,0 +1,101 @@
#include "decred-gate.h"
#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>
#include <unistd.h>
#if defined (DECRED_4WAY)
static __thread blake256_4way_context blake_mid;
void decred_hash_4way( void *state, const void *input )
{
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
// uint32_t hash0[8] __attribute__ ((aligned (32)));
// uint32_t hash1[8] __attribute__ ((aligned (32)));
// uint32_t hash2[8] __attribute__ ((aligned (32)));
// uint32_t hash3[8] __attribute__ ((aligned (32)));
const void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
int tail_len = 180 - DECRED_MIDSTATE_LEN;
blake256_4way_context ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
blake256_4way( &ctx, tail, tail_len );
blake256_4way_close( &ctx, vhash );
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
}
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t vdata[48*4] __attribute__ ((aligned (64)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t _ALIGN(64) edata[48];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
uint32_t n = first_nonce;
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
// copy to buffer guaranteed to be aligned.
memcpy( edata, pdata, 180 );
// use the old way until new way updated for size.
mm_interleave_4x32x( vdata, edata, edata, edata, edata, 180*8 );
blake256_4way_init( &blake_mid );
blake256_4way( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
do {
found[0] = found[1] = found[2] = found[3] = false;
* noncep = n;
*(noncep+1) = n+1;
*(noncep+2) = n+2;
*(noncep+3) = n+3;
decred_hash_4way( hash, vdata );
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
{
work_set_target_ratio( work, hash );
found[0] = true;
num_found++;
nonces[0] = n;
pdata[DECRED_NONCE_INDEX] = n;
}
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
{
work_set_target_ratio( work, hash+8 );
found[1] = true;
num_found++;
nonces[1] = n+1;
}
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
{
work_set_target_ratio( work, hash+16 );
found[2] = true;
num_found++;
nonces[2] = n+2;
}
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
{
work_set_target_ratio( work, hash+24 );
found[3] = true;
num_found++;
nonces[3] = n+3;
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

172
algo/blake/decred-gate.c Normal file
View File

@@ -0,0 +1,172 @@
#include "decred-gate.h"
#include <unistd.h>
#include <memory.h>
#include <string.h>
uint32_t *decred_get_nonceptr( uint32_t *work_data )
{
return &work_data[ DECRED_NONCE_INDEX ];
}
double decred_calc_network_diff( struct work* work )
{
// sample for diff 43.281 : 1c05ea29
// todo: endian reversed on longpoll could be zr5 specific...
uint32_t nbits = work->data[ DECRED_NBITS_INDEX ];
uint32_t bits = ( nbits & 0xffffff );
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
int m;
double d = (double)0x0000ffff / (double)bits;
for ( m = shift; m < 29; m++ )
d *= 256.0;
for ( m = 29; m < shift; m++ )
d /= 256.0;
if ( shift == 28 )
d *= 256.0; // testnet
if ( opt_debug_diff )
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d,
shift, bits );
return net_diff;
}
void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
{
// some random extradata to make the work unique
work->data[ DECRED_XNONCE_INDEX ] = (rand()*4);
work->height = work->data[32];
if (!have_longpoll && work->height > *net_blocks + 1)
{
char netinfo[64] = { 0 };
if (opt_showdiff && net_diff > 0.)
{
if (net_diff != work->targetdiff)
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
work->targetdiff);
else
sprintf(netinfo, ", diff %.3f", net_diff);
}
applog(LOG_BLUE, "%s block %d%s", algo_names[opt_algo], work->height,
netinfo);
*net_blocks = work->height - 1;
}
}
void decred_be_build_stratum_request( char *req, struct work *work,
struct stratum_ctx *sctx )
{
unsigned char *xnonce2str;
uint32_t ntime, nonce;
char ntimestr[9], noncestr[9];
be32enc( &ntime, work->data[ DECRED_NTIME_INDEX ] );
be32enc( &nonce, work->data[ DECRED_NONCE_INDEX ] );
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
xnonce2str = abin2hex( (char*)( &work->data[ DECRED_XNONCE_INDEX ] ),
sctx->xnonce1_size );
snprintf( req, JSON_BUF_LEN,
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
free(xnonce2str);
}
#define min(a,b) (a>b ? (b) :(a))
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
uchar merkle_root[64] = { 0 };
uint32_t extraheader[32] = { 0 };
int headersize = 0;
uint32_t* extradata = (uint32_t*) sctx->xnonce1;
size_t t;
int i;
// getwork over stratum, getwork merkle + header passed in coinb1
memcpy(merkle_root, sctx->job.coinbase, 32);
headersize = min((int)sctx->job.coinbase_size - 32,
sizeof(extraheader) );
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = swab32(
le32dec( (uint32_t *) sctx->job.prevhash + i ) );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = swab32( be32dec( (uint32_t *) merkle_root + i ) );
// for ( i = 0; i < 8; i++ ) // prevhash
// g_work->data[1 + i] = swab32( g_work->data[1 + i] );
// for ( i = 0; i < 8; i++ ) // merkle
// g_work->data[9 + i] = swab32( g_work->data[9 + i] );
for ( i = 0; i < headersize/4; i++ ) // header
g_work->data[17 + i] = extraheader[i];
// extradata
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
g_work->data[i] = 0;
g_work->data[37] = (rand()*4) << 8;
// block header suffix from coinb2 (stake version)
memcpy( &g_work->data[44],
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
sctx->bloc_height = g_work->data[32];
//applog_hex(work->data, 180);
//applog_hex(&work->data[36], 36);
}
#undef min
bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id )
{
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
// need to regen g_work..
return false;
if ( have_stratum && !work->data[0] && !opt_benchmark )
{
sleep(1);
return false;
}
// extradata: prevent duplicates
work->data[ DECRED_XNONCE_INDEX ] += 1;
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
return true;
}
bool register_decred_algo( algo_gate_t* gate )
{
#if defined(DECRED_4WAY)
four_way_not_tested();
gate->scanhash = (void*)&scanhash_decred_4way;
gate->hash = (void*)&decred_hash_4way;
#else
gate->scanhash = (void*)&scanhash_decred;
gate->hash = (void*)&decred_hash;
#endif
gate->optimizations = AVX2_OPT;
gate->get_nonceptr = (void*)&decred_get_nonceptr;
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
gate->display_extra_data = (void*)&decred_decode_extradata;
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->build_extraheader = (void*)&decred_build_extraheader;
gate->ready_to_mine = (void*)&decred_ready_to_mine;
gate->nbits_index = DECRED_NBITS_INDEX;
gate->ntime_index = DECRED_NTIME_INDEX;
gate->nonce_index = DECRED_NONCE_INDEX;
gate->work_data_size = DECRED_DATA_SIZE;
gate->work_cmp_size = DECRED_WORK_COMPARE_SIZE;
allow_mininginfo = false;
have_gbt = false;
return true;
}

36
algo/blake/decred-gate.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef __DECRED_GATE_H__
#define __DECRED_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#define DECRED_NBITS_INDEX 29
#define DECRED_NTIME_INDEX 34
#define DECRED_NONCE_INDEX 35
#define DECRED_XNONCE_INDEX 36
#define DECRED_DATA_SIZE 192
#define DECRED_WORK_COMPARE_SIZE 140
#define DECRED_MIDSTATE_LEN 128
#if defined (__AVX2__)
//void blakehash_84way(void *state, const void *input);
//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done );
#endif
#if defined(__AVX2__)
#define DECRED_4WAY
#endif
#if defined (DECRED_4WAY)
void decred_hash_4way(void *state, const void *input);
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void decred_hash( void *state, const void *input );
int scanhash_decred( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -1,10 +1,11 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "decred-gate.h"
#include "sph_blake.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>
#include <unistd.h>
/*
#ifndef min
#define min(a,b) (a>b ? b : a)
@@ -13,33 +14,33 @@
#define max(a,b) (a<b ? b : a)
#endif
*/
/*
#define DECRED_NBITS_INDEX 29
#define DECRED_NTIME_INDEX 34
#define DECRED_NONCE_INDEX 35
#define DECRED_XNONCE_INDEX 36
#define DECRED_DATA_SIZE 192
#define DECRED_WORK_COMPARE_SIZE 140
*/
static __thread sph_blake256_context blake_mid;
static __thread bool ctx_midstate_done = false;
void decred_hash(void *state, const void *input)
{
#define MIDSTATE_LEN 128
sph_blake256_context ctx;
// #define MIDSTATE_LEN 128
sph_blake256_context ctx __attribute__ ((aligned (64)));
uint8_t *ending = (uint8_t*) input;
ending += MIDSTATE_LEN;
ending += DECRED_MIDSTATE_LEN;
if (!ctx_midstate_done) {
sph_blake256_init(&blake_mid);
sph_blake256(&blake_mid, input, MIDSTATE_LEN);
sph_blake256(&blake_mid, input, DECRED_MIDSTATE_LEN);
ctx_midstate_done = true;
}
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
sph_blake256(&ctx, ending, (180 - MIDSTATE_LEN));
sph_blake256(&ctx, ending, (180 - DECRED_MIDSTATE_LEN));
sph_blake256_close(&ctx, state);
}
@@ -53,14 +54,14 @@ void decred_hash_simple(void *state, const void *input)
int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
{
uint32_t _ALIGN(128) endiandata[48];
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(64) endiandata[48];
uint32_t _ALIGN(64) hash32[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
#define DCR_NONCE_OFT32 35
// #define DCR_NONCE_OFT32 35
const uint32_t first_nonce = pdata[DCR_NONCE_OFT32];
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
uint32_t n = first_nonce;
@@ -80,7 +81,7 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
do {
//be32enc(&endiandata[DCR_NONCE_OFT32], n);
endiandata[DCR_NONCE_OFT32] = n;
endiandata[DECRED_NONCE_INDEX] = n;
decred_hash(hash32, endiandata);
if (hash32[7] <= HTarget && fulltest(hash32, ptarget)) {
@@ -91,7 +92,7 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
applog_hash(ptarget);
applog_compare_hash(hash32, ptarget);
#endif
pdata[DCR_NONCE_OFT32] = n;
pdata[DECRED_NONCE_INDEX] = n;
return 1;
}
@@ -100,24 +101,17 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[DCR_NONCE_OFT32] = n;
pdata[DECRED_NONCE_INDEX] = n;
return 0;
}
/*
uint32_t *decred_get_nonceptr( uint32_t *work_data )
{
return &work_data[ DECRED_NONCE_INDEX ];
}
// does decred need a custom stratum_get_g_work to fix nicehash
// bad extranonce2 size?
//
// does decred need a custom init_nonce?
// does it need to increment nonce, seems not because gen_work_now always
// returns true
double decred_calc_network_diff( struct work* work )
//void decred_calc_network_diff( struct work* work )
{
// sample for diff 43.281 : 1c05ea29
// todo: endian reversed on longpoll could be zr5 specific...
@@ -179,7 +173,7 @@ void decred_be_build_stratum_request( char *req, struct work *work,
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
free(xnonce2str);
}
*/
/*
// data shared between gen_merkle_root and build_extraheader.
__thread uint32_t decred_extraheader[32] = { 0 };
@@ -195,6 +189,7 @@ void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
}
*/
/*
#define min(a,b) (a>b ? (b) :(a))
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
@@ -232,11 +227,15 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
for ( i = 0; i < headersize/4; i++ ) // header
g_work->data[17 + i] = extraheader[i];
// extradata
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
g_work->data[i] = 0;
g_work->data[37] = (rand()*4) << 8;
// block header suffix from coinb2 (stake version)
memcpy( &g_work->data[44],
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
sctx->bloc_height = g_work->data[32];
//applog_hex(work->data, 180);
//applog_hex(&work->data[36], 36);
@@ -244,21 +243,6 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
#undef min
/*
bool decred_prevent_dupes( struct work* work, struct stratum_ctx* stratum,
int thr_id )
{
return false;
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
// need to regen g_work..
return true;
// extradata: prevent duplicates
work->data[ DECRED_XNONCE_INDEX ] += 1;
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
return false;
}
*/
bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id )
{
@@ -282,14 +266,13 @@ bool register_decred_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_decred;
gate->hash = (void*)&decred_hash;
gate->hash_alt = (void*)&decred_hash;
gate->get_nonceptr = (void*)&decred_get_nonceptr;
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
gate->display_extra_data = (void*)&decred_decode_extradata;
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
// gate->gen_merkle_root = (void*)&decred_gen_merkle_root;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->build_extraheader = (void*)&decred_build_extraheader;
// gate->prevent_dupes = (void*)&decred_prevent_dupes;
gate->ready_to_mine = (void*)&decred_ready_to_mine;
gate->nbits_index = DECRED_NBITS_INDEX;
gate->ntime_index = DECRED_NTIME_INDEX;
@@ -300,4 +283,4 @@ bool register_decred_algo( algo_gate_t* gate )
have_gbt = false;
return true;
}
*/

View File

@@ -0,0 +1,207 @@
#include "pentablake-gate.h"
#if defined (__AVX2__)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake-hash-4way.h"
#include "sph_blake.h"
//#define DEBUG_ALGO
extern void pentablakehash_4way( void *output, const void *input )
{
unsigned char _ALIGN(32) hash[128];
// // same as uint32_t hashA[16], hashB[16];
// #define hashB hash+64
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
blake512_4way_context ctx;
blake512_4way_init( &ctx );
blake512_4way( &ctx, input, 80 );
blake512_4way_close( &ctx, vhash );
uint64_t sin0[10], sin1[10], sin2[10], sin3[10];
mm256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 );
sph_blake512_context ctx2_blake;
sph_blake512_init(&ctx2_blake);
sph_blake512(&ctx2_blake, sin0, 80);
sph_blake512_close(&ctx2_blake, (void*) hash);
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
uint64_t* hash64 = (uint64_t*)hash;
for( int i = 0; i < 8; i++ )
{
if ( hash0[i] != hash64[i] )
printf("hash mismatch %u\n",i);
}
blake512_4way_init( &ctx );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
/*
uint64_t sin0[10] __attribute__ ((aligned (64)));
uint64_t sin1[10] __attribute__ ((aligned (64)));
uint64_t sin2[10] __attribute__ ((aligned (64)));
uint64_t sin3[10] __attribute__ ((aligned (64)));
sph_blake512_context ctx_blake;
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, input, 80);
sph_blake512_close(&ctx_blake, hash);
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
memcpy(output, hash, 32);
*/
}
int scanhash_pentablake_4way( int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t endiandata[32] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
// uint32_t _ALIGN(32) hash64[8];
// uint32_t _ALIGN(32) endiandata[32];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
for ( int m=0; m < 6; m++ )
{
if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
pentablakehash_4way( hash, vdata );
// return immediately on nonce found, only one submit
if ( ( !(hash[7] & mask) ) && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( (! ((hash+8)[7] & mask) ) && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( ( !((hash+16)[7] & mask) ) && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( ( !((hash+24)[7] & mask) ) && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
n += 4;
} while (n < max_nonce && !work_restart[thr_id].restart);
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif

View File

@@ -0,0 +1,16 @@
#include "pentablake-gate.h"
bool register_pentablake_algo( algo_gate_t* gate )
{
#if defined (PENTABLAKE_4WAY)
gate->scanhash = (void*)&scanhash_pentablake_4way;
gate->hash = (void*)&pentablakehash_4way;
#else
gate->scanhash = (void*)&scanhash_pentablake;
gate->hash = (void*)&pentablakehash;
#endif
gate->optimizations = AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -0,0 +1,21 @@
#ifndef __PENTABLAKE_GATE_H__
#define __PENTABLAKE_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__)
#define PENTABLAKE_4WAY
#endif
#if defined(PENTABLAKE_4WAY)
void pentablakehash_4way( void *state, const void *input );
int scanhash_pentablake_4way( int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done );
#endif
void pentablakehash( void *state, const void *input );
int scanhash_pentablake( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -1,5 +1,4 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "pentablake-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -111,11 +110,3 @@ int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
return 0;
}
bool register_pentablake_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_pentablake;
gate->hash = (void*)&pentablakehash;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -15,8 +15,8 @@
#include <string.h>
#include <stdio.h>
#include "algo/sha3/sph_types.h"
#include "crypto/blake2s.h"
#include "algo/sha/sph_types.h"
#include "sph-blake2s.h"
static const uint32_t blake2s_IV[8] =
{

View File

@@ -813,6 +813,7 @@ blake32(sph_blake_small_context *sc, const void *data, size_t len)
buf = sc->buf;
ptr = sc->ptr;
if (len < (sizeof sc->buf) - ptr) {
memcpy(buf + ptr, data, len);
ptr += len;
@@ -871,6 +872,7 @@ blake32_close(sph_blake_small_context *sc,
} else {
sc->T0 -= 512 - bit_len;
}
if (bit_len <= 446) {
memset(u.buf + ptr + 1, 0, 55 - ptr);
if (out_size_w32 == 8)
@@ -890,9 +892,9 @@ blake32_close(sph_blake_small_context *sc,
sph_enc32be_aligned(u.buf + 60, tl);
blake32(sc, u.buf, 64);
}
out = dst;
for (k = 0; k < out_size_w32; k ++)
sph_enc32be(out + (k << 2), sc->H[k]);
out = dst;
for (k = 0; k < out_size_w32; k ++)
sph_enc32be(out + (k << 2), sc->H[k]);
}
#if SPH_64
@@ -982,9 +984,11 @@ blake64_close(sph_blake_big_context *sc,
u.buf[111] |= 1;
sph_enc64be_aligned(u.buf + 112, th);
sph_enc64be_aligned(u.buf + 120, tl);
blake64(sc, u.buf + ptr, 128 - ptr);
} else {
memset(u.buf + ptr + 1, 0, 127 - ptr);
blake64(sc, u.buf + ptr, 128 - ptr);
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
@@ -993,6 +997,7 @@ blake64_close(sph_blake_big_context *sc,
u.buf[111] = 1;
sph_enc64be_aligned(u.buf + 112, th);
sph_enc64be_aligned(u.buf + 120, tl);
blake64(sc, u.buf, 128);
}
out = dst;

View File

@@ -42,7 +42,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for BLAKE-224.

View File

@@ -31,7 +31,7 @@
#include <stdint.h>
#include <string.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
#include "sph_blake2b.h"
// Cyclic right rotation.

1168
algo/bmw/bmw-hash-4way.c Normal file

File diff suppressed because it is too large Load Diff

95
algo/bmw/bmw-hash-4way.h Normal file
View File

@@ -0,0 +1,95 @@
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
/**
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of
* functions which differ by their output size; this implementation
* defines BMW for output sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_bmw.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef BMW_HASH_H__
#define BMW_HASH_H__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#ifdef __AVX2__
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#define SPH_SIZE_bmw256 256
#define SPH_SIZE_bmw512 512
typedef struct {
__m128i buf[64];
__m128i H[16];
size_t ptr;
sph_u32 bit_count; // assume bit_count fits in 32 bits
} bmw_4way_small_context;
typedef bmw_4way_small_context bmw256_4way_context;
typedef struct {
__m256i buf[16];
__m256i H[16];
size_t ptr;
sph_u64 bit_count;
} bmw_4way_big_context;
typedef bmw_4way_big_context bmw512_4way_context;
void bmw256_4way_init(void *cc);
void bmw256_4way(void *cc, const void *data, size_t len);
void bmw256_4way_close(void *cc, void *dst);
void bmw256_4way_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
void bmw512_4way_init(void *cc);
void bmw512_4way(void *cc, const void *data, size_t len);
void bmw512_4way_close(void *cc, void *dst);
void bmw512_4way_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif
#ifdef __cplusplus
}
#endif
#endif

1251
algo/bmw/bmw.test Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>

View File

@@ -41,7 +41,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for BMW-224.

View File

@@ -477,7 +477,7 @@ do { \
for (u = 0; u < 16; u ++) \
sph_enc64le_aligned(data + 8 * u, h2[u]); \
dh = h1; \
h = final_b; \
h = (sph_u64*)final_b; \
} \
/* end wrapped for break loop */ \
out = dst; \

View File

@@ -2,7 +2,6 @@
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include "miner.h"
#include "algo-gate-api.h"
#if defined(__arm__) || defined(_MSC_VER)

View File

@@ -3,7 +3,8 @@
#include "cryptonight.h"
#include "miner.h"
#include "crypto/c_keccak.h"
#include "avxdefs.h"
#include <immintrin.h>
//#include "avxdefs.h"
void aesni_parallel_noxor(uint8_t *long_state, uint8_t *text, uint8_t *ExpandedKey);
void aesni_parallel_xor(uint8_t *text, uint8_t *ExpandedKey, uint8_t *long_state);
@@ -109,43 +110,43 @@ static __thread cryptonight_ctx ctx;
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
{
#ifndef NO_AES_NI
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
__m128i *longoutput, *expkey, *xmminput;
size_t i, j;
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
ExpandAESKey256( ExpandedKey );
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
__m128i *longoutput, *expkey, *xmminput;
longoutput = (__m128i *)ctx.long_state;
expkey = (__m128i *)ExpandedKey;
xmminput = (__m128i *)ctx.text;
longoutput = (__m128i*)ctx.long_state;
xmminput = (__m128i*)ctx.text;
expkey = (__m128i*)ExpandedKey;
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
// aesni_parallel_noxor(&ctx->long_state[i], ctx->text, ExpandedKey);
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
// prefetch expkey, xmminput and enough longoutput for 4 iterations
_mm_prefetch( xmminput, _MM_HINT_T0 );
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
for ( i = 0; i < 64; i += 16 )
{
_mm_prefetch( longoutput + i, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 4, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 8, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
}
_mm_prefetch( expkey, _MM_HINT_T0 );
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
for ( i = 0; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
for ( i = 0; i < 64; i += 16 )
{
// prefetch 4 loops ahead,
__builtin_prefetch( longoutput + i, 1, 0 );
__builtin_prefetch( longoutput + i + 4, 1, 0 );
__builtin_prefetch( longoutput + i + 8, 1, 0 );
__builtin_prefetch( longoutput + i + 12, 1, 0 );
}
// n-4 iterations
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
i += INIT_SIZE_M128I )
{
// prefetch 4 iterations ahead.
__builtin_prefetch( longoutput + i + 64, 1, 0 );
__builtin_prefetch( longoutput + i + 68, 1, 0 );
for (j = 0; j < 10; j++ )
for ( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
@@ -165,84 +166,99 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
}
// last 4 iterations
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
{
for ( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
}
// cast_m128i( ctx.a ) = _mm_xor_si128( casti_m128i( ctx.state.k, 0 ) ,
// casti_m128i( ctx.state.k, 2 ) );
// cast_m128i( ctx.b ) = _mm_xor_si128( casti_m128i( ctx.state.k, 1 ),
// casti_m128i( ctx.state.k, 3 ) );
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
// for (i = 0; i < 2; i++)
// {
// ctx.a[i] = ((uint64_t *)ctx.state.k)[i] ^ ((uint64_t *)ctx.state.k)[i+4];
// ctx.b[i] = ((uint64_t *)ctx.state.k)[i+2] ^ ((uint64_t *)ctx.state.k)[i+6];
// }
__m128i b_x = _mm_load_si128((__m128i *)ctx.b);
uint64_t a[2] __attribute((aligned(16))), b[2] __attribute((aligned(16)));
uint64_t a[2] __attribute((aligned(16))),
b[2] __attribute((aligned(16))),
c[2] __attribute((aligned(16)));
a[0] = ctx.a[0];
a[1] = ctx.a[1];
for(i = 0; __builtin_expect(i < 0x80000, 1); i++)
__m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
__m128i a_x = _mm_load_si128( (__m128i*)a );
__m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
__m128i c_x = _mm_load_si128( lsa );
uint64_t *nextblock;
uint64_t hi, lo;
// n-1 iterations
for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
{
uint64_t c[2];
__builtin_prefetch( &ctx.long_state[c[0] & 0x1FFFF0], 0, 1 );
__m128i c_x = _mm_load_si128(
(__m128i *)&ctx.long_state[a[0] & 0x1FFFF0]);
__m128i a_x = _mm_load_si128((__m128i *)a);
c_x = _mm_aesenc_si128(c_x, a_x);
_mm_store_si128((__m128i *)c, c_x);
b_x = _mm_xor_si128(b_x, c_x);
_mm_store_si128((__m128i *)&ctx.long_state[a[0] & 0x1FFFF0], b_x);
uint64_t *nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
// uint64_t b[2];
c_x = _mm_aesenc_si128( c_x, a_x );
_mm_store_si128( (__m128i*)c, c_x );
b_x = _mm_xor_si128( b_x, c_x );
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
_mm_store_si128( lsa, b_x );
b[0] = nextblock[0];
b[1] = nextblock[1];
{
uint64_t hi, lo;
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
__asm__( "mulq %3\n\t"
: "=d" ( hi ),
"=a" ( lo )
: "%a" ( c[0] ),
"rm" ( b[0] )
: "cc" );
__asm__("mulq %3\n\t"
: "=d" (hi),
"=a" (lo)
: "%a" (c[0]),
"rm" (b[0])
: "cc" );
a[0] += hi;
a[1] += lo;
}
uint64_t *dst = (uint64_t*)&ctx.long_state[c[0] & 0x1FFFF0];
// __m128i *dst = (__m128i*)&ctx.long_state[c[0] & 0x1FFFF0];
// *dst = cast_m128i( a );
dst[0] = a[0];
dst[1] = a[1];
// cast_m128i( a ) = _mm_xor_si128( cast_m128i( a ), cast_m128i( b ) );
a[0] ^= b[0];
a[1] ^= b[1];
b_x = c_x;
__builtin_prefetch( &ctx.long_state[a[0] & 0x1FFFF0], 0, 3 );
b_x = c_x;
nextblock[0] = a[0] + hi;
nextblock[1] = a[1] + lo;
a[0] = b[0] ^ nextblock[0];
a[1] = b[1] ^ nextblock[1];
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
a_x = _mm_load_si128( (__m128i*)a );
c_x = _mm_load_si128( lsa );
}
// abreviated nth iteration
c_x = _mm_aesenc_si128( c_x, a_x );
_mm_store_si128( (__m128i*)c, c_x );
b_x = _mm_xor_si128( b_x, c_x );
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
_mm_store_si128( lsa, b_x );
b[0] = nextblock[0];
b[1] = nextblock[1];
__asm__( "mulq %3\n\t"
: "=d" ( hi ),
"=a" ( lo )
: "%a" ( c[0] ),
"rm" ( b[0] )
: "cc" );
nextblock[0] = a[0] + hi;
nextblock[1] = a[1] + lo;
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
ExpandAESKey256( ExpandedKey );
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
// aesni_parallel_xor(&ctx->text, ExpandedKey, &ctx->long_state[i]);
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
_mm_prefetch( xmminput, _MM_HINT_T0 );
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
for ( i = 0; i < 64; i += 16 )
@@ -256,9 +272,11 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
for ( i = 0; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
// n-4 iterations
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
i += INIT_SIZE_M128I )
{
// stay 4 loops ahead,
// stay 4 iterations ahead.
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
@@ -283,10 +301,34 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
}
// last 4 iterations
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
{
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
for( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
}
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
#endif
}

View File

@@ -5,7 +5,6 @@
// Modified for CPUminer by Lucas Jones
#include "cpuminer-config.h"
//#include "miner.h"
#include "algo-gate-api.h"
#ifndef NO_AES_NI

View File

@@ -42,7 +42,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for CubeHash-224.

View File

@@ -9,11 +9,11 @@
#include <immintrin.h>
#endif
#include "cubehash_sse2.h"
#include "algo/sha3/sha3-defs.h"
//enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2 };
//#if defined(OPTIMIZE_SSE2)
#include "algo/sha/sha3-defs.h"
#include <stdbool.h>
#include <unistd.h>
#include <memory.h>
#include "avxdefs.h"
static void transform( cubehashParam *sp )
{
@@ -129,6 +129,18 @@ static void transform( cubehashParam *sp )
#endif
} // transform
// Cubehash context initializing is very expensive.
// Cache the intial value for faster reinitializing.
cubehashParam cube_ctx_cache __attribute__ ((aligned (64)));
int cubehashReinit( cubehashParam *sp )
{
memcpy( sp, &cube_ctx_cache, sizeof(cubehashParam) );
return SUCCESS;
}
// Initialize the cache then copy to sp.
int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
{
int i;
@@ -139,76 +151,77 @@ int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
/* Sanity checks */
if ( rounds <= 0 || rounds > 32 )
rounds = CUBEHASH_ROUNDS;
rounds = CUBEHASH_ROUNDS;
if ( blockbytes <= 0 || blockbytes >= 256)
blockbytes = CUBEHASH_BLOCKBYTES;
blockbytes = CUBEHASH_BLOCKBYTES;
// all sizes of __m128i
cube_ctx_cache.hashlen = hashbitlen/128;
cube_ctx_cache.blocksize = blockbytes/16;
cube_ctx_cache.rounds = rounds;
cube_ctx_cache.pos = 0;
sp->hashbitlen = hashbitlen;
sp->rounds = rounds;
sp->blockbytes = blockbytes;
for ( i = 0; i < 8; ++i )
sp->x[i] = _mm_set_epi32(0, 0, 0, 0);
sp->x[0] = _mm_set_epi32(0, sp->rounds, sp->blockbytes, hashbitlen / 8);
for ( i = 0; i < 10; ++i )
transform(sp);
sp->pos = 0;
return SUCCESS;
}
cube_ctx_cache.x[i] = _mm_setzero_si128();;
int
cubehashReset(cubehashParam *sp)
{
return cubehashInit(sp, sp->hashbitlen, sp->rounds, sp->blockbytes);
cube_ctx_cache.x[0] = _mm_set_epi32( 0, rounds, blockbytes,
hashbitlen / 8 );
for ( i = 0; i < 10; ++i )
transform( &cube_ctx_cache );
memcpy( sp, &cube_ctx_cache, sizeof(cubehashParam) );
return SUCCESS;
}
int cubehashUpdate( cubehashParam *sp, const byte *data, size_t size )
{
uint64_t databitlen = 8 * size;
const int len = size / 16;
const __m128i* in = (__m128i*)data;
int i;
/* caller promises us that previous data had integral number of bytes */
/* so sp->pos is a multiple of 8 */
// It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
// Current usage sata is either 64 or 80 bytes.
while ( databitlen >= 8 )
for ( i = 0; i < len; i++ )
{
( (unsigned char *)sp->x )[sp->pos/8] ^= *data;
data += 1;
databitlen -= 8;
sp->pos += 8;
if ( sp->pos == 8 * sp->blockbytes )
sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform( sp );
sp->pos = 0;
}
}
if ( databitlen > 0 )
{
( (unsigned char *)sp->x )[sp->pos/8] ^= *data;
sp->pos += databitlen;
transform( sp );
sp->pos = 0;
}
}
return SUCCESS;
}
int cubehashDigest( cubehashParam *sp, byte *digest )
{
__m128i* hash = (__m128i*)digest;
int i;
( (unsigned char *)sp->x )[sp->pos/8] ^= ( 128 >> (sp->pos % 8) );
transform(sp);
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ],
_mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 ) );
transform( sp );
sp->x[7] = _mm_xor_si128(sp->x[7], _mm_set_epi32(1, 0, 0, 0));
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi32( 1,0,0,0 ) );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
for ( i = 0; i < sp->hashbitlen / 8; ++i )
digest[i] = ((unsigned char *) sp->x)[i];
for ( i = 0; i < sp->hashlen; i++ )
hash[i] = sp->x[i];
return SUCCESS;
}
@@ -216,48 +229,45 @@ int cubehashDigest( cubehashParam *sp, byte *digest )
int cubehashUpdateDigest( cubehashParam *sp, byte *digest,
const byte *data, size_t size )
{
uint64_t databitlen = 8 * size;
int hashlen128 = sp->hashbitlen/128;
const int len = size / 16;
const __m128i* in = (__m128i*)data;
__m128i* hash = (__m128i*)digest;
int i;
/* caller promises us that previous data had integral number of bytes */
/* so sp->pos is a multiple of 8 */
// It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
// Current usage sata is either 64 or 80 bytes.
while ( databitlen >= 8 )
for ( i = 0; i < len; i++ )
{
( (unsigned char *)sp->x )[sp->pos/8] ^= *data;
data += 1;
databitlen -= 8;
sp->pos += 8;
if ( sp->pos == 8 * sp->blockbytes )
sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform(sp);
sp->pos = 0;
transform( sp );
sp->pos = 0;
}
}
if ( databitlen > 0 )
{
( (unsigned char *)sp->x )[sp->pos/8] ^= *data;
sp->pos += databitlen;
}
( (unsigned char *)sp->x )[sp->pos/8] ^= ( 128 >> (sp->pos % 8) );
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ],
_mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 ) );
transform( sp );
sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi32(1,0,0,0) );
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi32( 1,0,0,0 ) );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
for ( i = 0; i < hashlen128; i++ )
( (__m128i*)digest )[i] = ( (__m128i*)sp->x )[i];
for ( i = 0; i < sp->hashlen; i++ )
hash[i] = sp->x[i];
return SUCCESS;
}

View File

@@ -3,58 +3,37 @@
#include "compat.h"
#include <stdint.h>
#include "algo/sha3/sha3-defs.h"
//#include <beecrypt/beecrypt.h>
#include "algo/sha/sha3-defs.h"
//#if defined(__SSE2__)
#define OPTIMIZE_SSE2
//#endif
#if defined(OPTIMIZE_SSE2)
#include <emmintrin.h>
#endif
/*!\brief Holds all the parameters necessary for the CUBEHASH algorithm.
* \ingroup HASH_cubehash_m
*/
struct _cubehashParam
//#endif
{
int hashbitlen;
int hashlen; // __m128i
int rounds;
int blockbytes;
int pos; /* number of bits read into x from current block */
#if defined(OPTIMIZE_SSE2)
__m128i _ALIGN(256) x[8];
#else
uint32_t x[32];
#endif
int blocksize; // __m128i
int pos; // number of __m128i read into x from current block
__m128i _ALIGN(256) x[8]; // aligned for __m256i
};
//#ifndef __cplusplus
typedef struct _cubehashParam cubehashParam;
//#endif
#ifdef __cplusplus
extern "C" {
#endif
/*!\var cubehash256
* \brief Holds the full API description of the CUBEHASH algorithm.
*/
//extern BEECRYPTAPI const hashFunction cubehash256;
//BEECRYPTAPI
int cubehashInit(cubehashParam* sp, int hashbitlen, int rounds, int blockbytes);
// reinitialize context with same parameters, much faster.
int cubehashReinit( cubehashParam* sp );
//BEECRYPTAPI
int cubehashReset(cubehashParam* sp);
//BEECRYPTAPI
int cubehashUpdate(cubehashParam* sp, const byte *data, size_t size);
//BEECRYPTAPI
int cubehashDigest(cubehashParam* sp, byte *digest);
int cubehashUpdateDigest( cubehashParam *sp, byte *digest, const byte *data,

View File

@@ -1,2 +0,0 @@
amd64
x86

View File

@@ -14,18 +14,20 @@
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
*
*/
#if defined(__AES__)
#include <memory.h>
#include "miner.h"
#include "hash_api.h"
#include "vperm.h"
//#include "vperm.h"
#include <immintrin.h>
/*
#ifndef NO_AES_NI
#include <wmmintrin.h>
#else
#include <tmmintrin.h>
#endif
*/
MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
@@ -246,7 +248,8 @@ void DumpState(__m128i *ps)
void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
{
unsigned int r, b, i, j;
__m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
// __m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
__m128i t1, t2, s2, k1;
__m128i _state[4][4], _state2[4][4], _statebackup[4][4];
@@ -396,7 +399,7 @@ HashReturn init_echo(hashState_echo *ctx, int nHashSize)
{
int i, j;
ctx->k = _mm_xor_si128(ctx->k, ctx->k);
ctx->k = _mm_setzero_si128();
ctx->processed_bits = 0;
ctx->uBufferBytes = 0;
@@ -742,4 +745,4 @@ HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databit
return SUCCESS;
}
#endif

View File

@@ -22,7 +22,7 @@
#endif
#include "algo/sha3/sha3_common.h"
#include "algo/sha/sha3_common.h"
#include <emmintrin.h>

View File

@@ -1 +0,0 @@
Çağdaş Çalık

View File

@@ -1,119 +0,0 @@
/*
* file : vperm.h
* version : 1.0.208
* date : 14.12.2010
*
* vperm implementation of AES s-box
*
* Credits: Adapted from Mike Hamburg's AES implementation, http://crypto.stanford.edu/vpaes/
*
* Cagdas Calik
* ccalik@metu.edu.tr
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
*
*/
#ifndef VPERM_H
#define VPERM_H
#include "algo/sha3/sha3_common.h"
#include <tmmintrin.h>
/*
extern const unsigned int _k_s0F[];
extern const unsigned int _k_ipt[];
extern const unsigned int _k_opt[];
extern const unsigned int _k_inv[];
extern const unsigned int _k_sb1[];
extern const unsigned int _k_sb2[];
extern const unsigned int _k_sb3[];
extern const unsigned int _k_sb4[];
extern const unsigned int _k_sb5[];
extern const unsigned int _k_sb7[];
extern const unsigned int _k_sbo[];
extern const unsigned int _k_h63[];
extern const unsigned int _k_hc6[];
extern const unsigned int _k_h5b[];
extern const unsigned int _k_h4e[];
extern const unsigned int _k_h0e[];
extern const unsigned int _k_h15[];
extern const unsigned int _k_aesmix1[];
extern const unsigned int _k_aesmix2[];
extern const unsigned int _k_aesmix3[];
extern const unsigned int _k_aesmix4[];
*/
// input: x, table
// output: x
#define TRANSFORM(x, table, t1, t2)\
t1 = _mm_andnot_si128(M128(_k_s0F), x);\
t1 = _mm_srli_epi32(t1, 4);\
x = _mm_and_si128(x, M128(_k_s0F));\
t1 = _mm_shuffle_epi8(*((__m128i*)table + 1), t1);\
x = _mm_shuffle_epi8(*((__m128i*)table + 0), x);\
x = _mm_xor_si128(x, t1)
// compiled erroneously with 32-bit msc compiler
//t2 = _mm_shuffle_epi8(table[0], x);\
//x = _mm_shuffle_epi8(table[1], t1);\
//x = _mm_xor_si128(x, t2)
// input: x
// output: t2, t3
#define SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4)\
t1 = _mm_andnot_si128(M128(_k_s0F), x);\
t1 = _mm_srli_epi32(t1, 4);\
x = _mm_and_si128(x, M128(_k_s0F));\
t2 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 1), x);\
x = _mm_xor_si128(x, t1);\
t3 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t1);\
t3 = _mm_xor_si128(t3, t2);\
t4 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), x);\
t4 = _mm_xor_si128(t4, t2);\
t2 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t3);\
t2 = _mm_xor_si128(t2, x);\
t3 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t4);\
t3 = _mm_xor_si128(t3, t1);\
// input: x1, x2, table
// output: y
#define VPERM_LOOKUP(x1, x2, table, y, t)\
t = _mm_shuffle_epi8(*((__m128i*)table + 0), x1);\
y = _mm_shuffle_epi8(*((__m128i*)table + 1), x2);\
y = _mm_xor_si128(y, t)
// input: x
// output: x
#define SUBSTITUTE_VPERM(x, t1, t2, t3, t4) \
TRANSFORM(x, _k_ipt, t1, t2);\
SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4);\
VPERM_LOOKUP(t2, t3, _k_sbo, x, t1);\
x = _mm_xor_si128(x, M128(_k_h63))
// input: x
// output: x
#define AES_ROUND_VPERM_CORE(x, t1, t2, t3, t4, s1, s2, s3) \
SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4);\
VPERM_LOOKUP(t2, t3, _k_sb1, s1, t1);\
VPERM_LOOKUP(t2, t3, _k_sb2, s2, t1);\
s3 = _mm_xor_si128(s1, s2);\
x = _mm_shuffle_epi8(s2, M128(_k_aesmix1));\
x = _mm_xor_si128(x, _mm_shuffle_epi8(s3, M128(_k_aesmix2)));\
x = _mm_xor_si128(x, _mm_shuffle_epi8(s1, M128(_k_aesmix3)));\
x = _mm_xor_si128(x, _mm_shuffle_epi8(s1, M128(_k_aesmix4)));\
x = _mm_xor_si128(x, M128(_k_h5b))
// input: x
// output: x
#define AES_ROUND_VPERM(x, t1, t2, t3, t4, s1, s2, s3) \
TRANSFORM(x, _k_ipt, t1, t2);\
AES_ROUND_VPERM_CORE(x, t1, t2, t3, t4, s1, s2, s3);\
TRANSFORM(x, _k_opt, t1, t2)
#endif // VPERM_H

View File

@@ -71,7 +71,7 @@ extern "C"{
#endif
#define AES_BIG_ENDIAN 0
#include "algo/sha3/aes_helper.c"
#include "algo/sha/aes_helper.c"
#if SPH_ECHO_64

View File

@@ -41,7 +41,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for ECHO-224.

View File

@@ -41,7 +41,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for ECHO-224.

View File

@@ -2,7 +2,7 @@
#define SPH_FUGUE_H__
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
#ifdef __cplusplus
extern "C"{

View File

@@ -41,7 +41,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for GOST-256.

View File

@@ -13,8 +13,8 @@
/* global constants */
__m128i ROUND_CONST_Lx;
__m128i ROUND_CONST_L0[ROUNDS512];
__m128i ROUND_CONST_L7[ROUNDS512];
//__m128i ROUND_CONST_L0[ROUNDS512];
//__m128i ROUND_CONST_L7[ROUNDS512];
__m128i ROUND_CONST_P[ROUNDS1024];
__m128i ROUND_CONST_Q[ROUNDS1024];
__m128i TRANSP_MASK;
@@ -22,11 +22,9 @@ __m128i SUBSH_MASK[8];
__m128i ALL_1B;
__m128i ALL_FF;
#define tos(a) #a
#define tostr(a) tos(a)
/* xmm[i] will be multiplied by 2
* xmm[j] will be lost
* xmm[k] has to be all 0x1b */
@@ -153,352 +151,6 @@ __m128i ALL_FF;
b1 = _mm_xor_si128(b1, a4);\
}/*MixBytes*/
#if (LENGTH <= 256)
#define SET_CONSTANTS(){\
ALL_1B = _mm_set_epi32(0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b);\
TRANSP_MASK = _mm_set_epi32(0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800);\
SUBSH_MASK[0] = _mm_set_epi32(0x03060a0d, 0x08020509, 0x0c0f0104, 0x070b0e00);\
SUBSH_MASK[1] = _mm_set_epi32(0x04070c0f, 0x0a03060b, 0x0e090205, 0x000d0801);\
SUBSH_MASK[2] = _mm_set_epi32(0x05000e09, 0x0c04070d, 0x080b0306, 0x010f0a02);\
SUBSH_MASK[3] = _mm_set_epi32(0x0601080b, 0x0e05000f, 0x0a0d0407, 0x02090c03);\
SUBSH_MASK[4] = _mm_set_epi32(0x0702090c, 0x0f060108, 0x0b0e0500, 0x030a0d04);\
SUBSH_MASK[5] = _mm_set_epi32(0x00030b0e, 0x0907020a, 0x0d080601, 0x040c0f05);\
SUBSH_MASK[6] = _mm_set_epi32(0x01040d08, 0x0b00030c, 0x0f0a0702, 0x050e0906);\
SUBSH_MASK[7] = _mm_set_epi32(0x02050f0a, 0x0d01040e, 0x090c0003, 0x06080b07);\
for(i = 0; i < ROUNDS512; i++)\
{\
ROUND_CONST_L0[i] = _mm_set_epi32(0xffffffff, 0xffffffff, 0x70605040 ^ (i * 0x01010101), 0x30201000 ^ (i * 0x01010101));\
ROUND_CONST_L7[i] = _mm_set_epi32(0x8f9fafbf ^ (i * 0x01010101), 0xcfdfefff ^ (i * 0x01010101), 0x00000000, 0x00000000);\
}\
ROUND_CONST_Lx = _mm_set_epi32(0xffffffff, 0xffffffff, 0x00000000, 0x00000000);\
}while(0); \
/* one round
* i = round number
* a0-a7 = input rows
* b0-b7 = output rows
*/
#define ROUND(i, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
/* AddRoundConstant */\
b1 = ROUND_CONST_Lx;\
a0 = _mm_xor_si128(a0, (ROUND_CONST_L0[i]));\
a1 = _mm_xor_si128(a1, b1);\
a2 = _mm_xor_si128(a2, b1);\
a3 = _mm_xor_si128(a3, b1);\
a4 = _mm_xor_si128(a4, b1);\
a5 = _mm_xor_si128(a5, b1);\
a6 = _mm_xor_si128(a6, b1);\
a7 = _mm_xor_si128(a7, (ROUND_CONST_L7[i]));\
\
/* ShiftBytes + SubBytes (interleaved) */\
b0 = _mm_xor_si128(b0, b0);\
a0 = _mm_shuffle_epi8(a0, (SUBSH_MASK[0]));\
a0 = _mm_aesenclast_si128(a0, b0);\
a1 = _mm_shuffle_epi8(a1, (SUBSH_MASK[1]));\
a1 = _mm_aesenclast_si128(a1, b0);\
a2 = _mm_shuffle_epi8(a2, (SUBSH_MASK[2]));\
a2 = _mm_aesenclast_si128(a2, b0);\
a3 = _mm_shuffle_epi8(a3, (SUBSH_MASK[3]));\
a3 = _mm_aesenclast_si128(a3, b0);\
a4 = _mm_shuffle_epi8(a4, (SUBSH_MASK[4]));\
a4 = _mm_aesenclast_si128(a4, b0);\
a5 = _mm_shuffle_epi8(a5, (SUBSH_MASK[5]));\
a5 = _mm_aesenclast_si128(a5, b0);\
a6 = _mm_shuffle_epi8(a6, (SUBSH_MASK[6]));\
a6 = _mm_aesenclast_si128(a6, b0);\
a7 = _mm_shuffle_epi8(a7, (SUBSH_MASK[7]));\
a7 = _mm_aesenclast_si128(a7, b0);\
\
/* MixBytes */\
MixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\
\
}
/* 10 rounds, P and Q in parallel */
#define ROUNDS_P_Q(){\
ROUND(0, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
ROUND(1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
ROUND(2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
ROUND(3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
ROUND(4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
ROUND(5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
ROUND(6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
ROUND(7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
ROUND(8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
ROUND(9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
}
/* Matrix Transpose Step 1
* input is a 512-bit state with two columns in one xmm
* output is a 512-bit state with two rows in one xmm
* inputs: i0-i3
* outputs: i0, o1-o3
* clobbers: t0
*/
#define Matrix_Transpose_A(i0, i1, i2, i3, o1, o2, o3, t0){\
t0 = TRANSP_MASK;\
\
i0 = _mm_shuffle_epi8(i0, t0);\
i1 = _mm_shuffle_epi8(i1, t0);\
i2 = _mm_shuffle_epi8(i2, t0);\
i3 = _mm_shuffle_epi8(i3, t0);\
\
o1 = i0;\
t0 = i2;\
\
i0 = _mm_unpacklo_epi16(i0, i1);\
o1 = _mm_unpackhi_epi16(o1, i1);\
i2 = _mm_unpacklo_epi16(i2, i3);\
t0 = _mm_unpackhi_epi16(t0, i3);\
\
i0 = _mm_shuffle_epi32(i0, 216);\
o1 = _mm_shuffle_epi32(o1, 216);\
i2 = _mm_shuffle_epi32(i2, 216);\
t0 = _mm_shuffle_epi32(t0, 216);\
\
o2 = i0;\
o3 = o1;\
\
i0 = _mm_unpacklo_epi32(i0, i2);\
o1 = _mm_unpacklo_epi32(o1, t0);\
o2 = _mm_unpackhi_epi32(o2, i2);\
o3 = _mm_unpackhi_epi32(o3, t0);\
}/**/
/* Matrix Transpose Step 2
* input are two 512-bit states with two rows in one xmm
* output are two 512-bit states with one row of each state in one xmm
* inputs: i0-i3 = P, i4-i7 = Q
* outputs: (i0, o1-o7) = (P|Q)
* possible reassignments: (output reg = input reg)
* * i1 -> o3-7
* * i2 -> o5-7
* * i3 -> o7
* * i4 -> o3-7
* * i5 -> o6-7
*/
#define Matrix_Transpose_B(i0, i1, i2, i3, i4, i5, i6, i7, o1, o2, o3, o4, o5, o6, o7){\
o1 = i0;\
o2 = i1;\
i0 = _mm_unpacklo_epi64(i0, i4);\
o1 = _mm_unpackhi_epi64(o1, i4);\
o3 = i1;\
o4 = i2;\
o2 = _mm_unpacklo_epi64(o2, i5);\
o3 = _mm_unpackhi_epi64(o3, i5);\
o5 = i2;\
o6 = i3;\
o4 = _mm_unpacklo_epi64(o4, i6);\
o5 = _mm_unpackhi_epi64(o5, i6);\
o7 = i3;\
o6 = _mm_unpacklo_epi64(o6, i7);\
o7 = _mm_unpackhi_epi64(o7, i7);\
}/**/
/* Matrix Transpose Inverse Step 2
* input are two 512-bit states with one row of each state in one xmm
* output are two 512-bit states with two rows in one xmm
* inputs: i0-i7 = (P|Q)
* outputs: (i0, i2, i4, i6) = P, (o0-o3) = Q
*/
#define Matrix_Transpose_B_INV(i0, i1, i2, i3, i4, i5, i6, i7, o0, o1, o2, o3){\
o0 = i0;\
i0 = _mm_unpacklo_epi64(i0, i1);\
o0 = _mm_unpackhi_epi64(o0, i1);\
o1 = i2;\
i2 = _mm_unpacklo_epi64(i2, i3);\
o1 = _mm_unpackhi_epi64(o1, i3);\
o2 = i4;\
i4 = _mm_unpacklo_epi64(i4, i5);\
o2 = _mm_unpackhi_epi64(o2, i5);\
o3 = i6;\
i6 = _mm_unpacklo_epi64(i6, i7);\
o3 = _mm_unpackhi_epi64(o3, i7);\
}/**/
/* Matrix Transpose Output Step 2
* input is one 512-bit state with two rows in one xmm
* output is one 512-bit state with one row in the low 64-bits of one xmm
* inputs: i0,i2,i4,i6 = S
* outputs: (i0-7) = (0|S)
*/
#define Matrix_Transpose_O_B(i0, i1, i2, i3, i4, i5, i6, i7, t0){\
t0 = _mm_xor_si128(t0, t0);\
i1 = i0;\
i3 = i2;\
i5 = i4;\
i7 = i6;\
i0 = _mm_unpacklo_epi64(i0, t0);\
i1 = _mm_unpackhi_epi64(i1, t0);\
i2 = _mm_unpacklo_epi64(i2, t0);\
i3 = _mm_unpackhi_epi64(i3, t0);\
i4 = _mm_unpacklo_epi64(i4, t0);\
i5 = _mm_unpackhi_epi64(i5, t0);\
i6 = _mm_unpacklo_epi64(i6, t0);\
i7 = _mm_unpackhi_epi64(i7, t0);\
}/**/
/* Matrix Transpose Output Inverse Step 2
* input is one 512-bit state with one row in the low 64-bits of one xmm
* output is one 512-bit state with two rows in one xmm
* inputs: i0-i7 = (0|S)
* outputs: (i0, i2, i4, i6) = S
*/
#define Matrix_Transpose_O_B_INV(i0, i1, i2, i3, i4, i5, i6, i7){\
i0 = _mm_unpacklo_epi64(i0, i1);\
i2 = _mm_unpacklo_epi64(i2, i3);\
i4 = _mm_unpacklo_epi64(i4, i5);\
i6 = _mm_unpacklo_epi64(i6, i7);\
endif\
}/**/
void INIT(u64* h)
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, /*xmm1,*/ xmm2, /*xmm3, xmm4, xmm5,*/ xmm6, xmm7;
static __m128i /*xmm8, xmm9, xmm10, xmm11,*/ xmm12, xmm13, xmm14, xmm15;
/* load IV into registers xmm12 - xmm15 */
xmm12 = chaining[0];
xmm13 = chaining[1];
xmm14 = chaining[2];
xmm15 = chaining[3];
/* transform chaining value from column ordering into row ordering */
/* we put two rows (64 bit) of the IV into one 128-bit XMM register */
Matrix_Transpose_A(xmm12, xmm13, xmm14, xmm15, xmm2, xmm6, xmm7, xmm0);
/* store transposed IV */
chaining[0] = xmm12;
chaining[1] = xmm2;
chaining[2] = xmm6;
chaining[3] = xmm7;
}
void TF512(u64* h, u64* m)
{
__m128i* const chaining = (__m128i*) h;
__m128i* const message = (__m128i*) m;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP0;
static __m128i TEMP1;
static __m128i TEMP2;
#ifdef IACA_TRACE
IACA_START;
#endif
/* load message into registers xmm12 - xmm15 */
xmm12 = message[0];
xmm13 = message[1];
xmm14 = message[2];
xmm15 = message[3];
/* transform message M from column ordering into row ordering */
/* we first put two rows (64 bit) of the message into one 128-bit xmm register */
Matrix_Transpose_A(xmm12, xmm13, xmm14, xmm15, xmm2, xmm6, xmm7, xmm0);
/* load previous chaining value */
/* we first put two rows (64 bit) of the CV into one 128-bit xmm register */
xmm8 = chaining[0];
xmm0 = chaining[1];
xmm4 = chaining[2];
xmm5 = chaining[3];
/* xor message to CV get input of P */
/* result: CV+M in xmm8, xmm0, xmm4, xmm5 */
xmm8 = _mm_xor_si128(xmm8, xmm12);
xmm0 = _mm_xor_si128(xmm0, xmm2);
xmm4 = _mm_xor_si128(xmm4, xmm6);
xmm5 = _mm_xor_si128(xmm5, xmm7);
/* there are now 2 rows of the Groestl state (P and Q) in each xmm register */
/* unpack to get 1 row of P (64 bit) and Q (64 bit) into one xmm register */
/* result: the 8 rows of P and Q in xmm8 - xmm12 */
Matrix_Transpose_B(xmm8, xmm0, xmm4, xmm5, xmm12, xmm2, xmm6, xmm7, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);
/* compute the two permutations P and Q in parallel */
ROUNDS_P_Q();
/* unpack again to get two rows of P or two rows of Q in one xmm register */
Matrix_Transpose_B_INV(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3);
/* xor output of P and Q */
/* result: P(CV+M)+Q(M) in xmm0...xmm3 */
xmm0 = _mm_xor_si128(xmm0, xmm8);
xmm1 = _mm_xor_si128(xmm1, xmm10);
xmm2 = _mm_xor_si128(xmm2, xmm12);
xmm3 = _mm_xor_si128(xmm3, xmm14);
/* xor CV (feed-forward) */
/* result: P(CV+M)+Q(M)+CV in xmm0...xmm3 */
xmm0 = _mm_xor_si128(xmm0, (chaining[0]));
xmm1 = _mm_xor_si128(xmm1, (chaining[1]));
xmm2 = _mm_xor_si128(xmm2, (chaining[2]));
xmm3 = _mm_xor_si128(xmm3, (chaining[3]));
/* store CV */
chaining[0] = xmm0;
chaining[1] = xmm1;
chaining[2] = xmm2;
chaining[3] = xmm3;
#ifdef IACA_TRACE
IACA_END;
#endif
return;
}
void OF512(u64* h)
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP0;
static __m128i TEMP1;
static __m128i TEMP2;
/* load CV into registers xmm8, xmm10, xmm12, xmm14 */
xmm8 = chaining[0];
xmm10 = chaining[1];
xmm12 = chaining[2];
xmm14 = chaining[3];
/* there are now 2 rows of the CV in one xmm register */
/* unpack to get 1 row of P (64 bit) into one half of an xmm register */
/* result: the 8 input rows of P in xmm8 - xmm15 */
Matrix_Transpose_O_B(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0);
/* compute the permutation P */
/* result: the output of P(CV) in xmm8 - xmm15 */
ROUNDS_P_Q();
/* unpack again to get two rows of P in one xmm register */
/* result: P(CV) in xmm8, xmm10, xmm12, xmm14 */
Matrix_Transpose_O_B_INV(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);
/* xor CV to P output (feed-forward) */
/* result: P(CV)+CV in xmm8, xmm10, xmm12, xmm14 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm10 = _mm_xor_si128(xmm10, (chaining[1]));
xmm12 = _mm_xor_si128(xmm12, (chaining[2]));
xmm14 = _mm_xor_si128(xmm14, (chaining[3]));
/* transform state back from row ordering into column ordering */
/* result: final hash value in xmm9, xmm11 */
Matrix_Transpose_A(xmm8, xmm10, xmm12, xmm14, xmm4, xmm9, xmm11, xmm0);
/* we only need to return the truncated half of the state */
chaining[2] = xmm9;
chaining[3] = xmm11;
}
#endif
#if (LENGTH > 256)
#define SET_CONSTANTS(){\
ALL_FF = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);\
@@ -768,9 +420,8 @@ void OF512(u64* h)
}/**/
void INIT(u64* h)
void INIT( __m128i* chaining )
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
@@ -798,10 +449,8 @@ void INIT(u64* h)
chaining[7] = xmm15;
}
void TF1024(u64* h, u64* m)
void TF1024( __m128i* chaining, const __m128i* message )
{
__m128i* const chaining = (__m128i*) h;
__m128i* const message = (__m128i*) m;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i QTEMP[8];
@@ -914,9 +563,8 @@ void TF1024(u64* h, u64* m)
return;
}
void OF1024(u64* h)
void OF1024( __m128i* chaining )
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP0;
@@ -961,5 +609,3 @@ void OF1024(u64* h)
return;
}
#endif

View File

@@ -15,8 +15,8 @@
__m128i ROUND_CONST_Lx;
__m128i ROUND_CONST_L0[ROUNDS512];
__m128i ROUND_CONST_L7[ROUNDS512];
__m128i ROUND_CONST_P[ROUNDS1024];
__m128i ROUND_CONST_Q[ROUNDS1024];
//__m128i ROUND_CONST_P[ROUNDS1024];
//__m128i ROUND_CONST_Q[ROUNDS1024];
__m128i TRANSP_MASK;
__m128i SUBSH_MASK[8];
__m128i ALL_1B;
@@ -351,9 +351,8 @@ __m128i ALL_FF;
}/**/
void INIT256(u64* h)
void INIT256( __m128i* chaining )
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, /*xmm1,*/ xmm2, /*xmm3, xmm4, xmm5,*/ xmm6, xmm7;
static __m128i /*xmm8, xmm9, xmm10, xmm11,*/ xmm12, xmm13, xmm14, xmm15;
@@ -374,10 +373,8 @@ void INIT256(u64* h)
chaining[3] = xmm7;
}
void TF512(u64* h, u64* m)
void TF512( __m128i* chaining, __m128i* message )
{
__m128i* const chaining = (__m128i*) h;
__m128i* const message = (__m128i*) m;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP0;
@@ -449,9 +446,8 @@ void TF512(u64* h, u64* m)
return;
}
void OF512(u64* h)
void OF512( __m128i* chaining )
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP0;

View File

@@ -6,6 +6,9 @@
* This code is placed in the public domain
*/
// Optimized for hash and data length that are integrals of __m128i
#include <memory.h>
#include "hash-groestl.h"
#include "miner.h"
@@ -49,196 +52,191 @@
#endif
#endif
/* digest up to len bytes of input (full blocks only) */
void Transform( hashState_groestl *ctx, const u8 *in, unsigned long long len )
{
/* increment block counter */
ctx->block_counter += len/SIZE;
/* digest message, one block at a time */
for ( ; len >= SIZE; len -= SIZE, in += SIZE )
TF1024( (u64*)ctx->chaining, (u64*)in );
asm volatile ("emms");
}
/* given state h, do h <- P(h)+h */
void OutputTransformation( hashState_groestl *ctx )
{
/* determine variant */
OF1024( (u64*)ctx->chaining );
asm volatile ("emms");
}
/* initialise context */
HashReturn_gr init_groestl( hashState_groestl* ctx, int hashlen )
{
u8 i = 0;
int i;
ctx->hashlen = hashlen;
SET_CONSTANTS();
for ( i = 0; i < SIZE / 8; i++ )
ctx->chaining[i] = 0;
for ( i = 0; i < SIZE; i++ )
ctx->buffer[i] = 0;
if (ctx->chaining == NULL || ctx->buffer == NULL)
return FAIL_GR;
/* set initial value */
ctx->chaining[COLS-1] = U64BIG((u64)LENGTH);
for ( i = 0; i < SIZE512; i++ )
{
ctx->chaining[i] = _mm_setzero_si128();
ctx->buffer[i] = _mm_setzero_si128();
}
((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
INIT(ctx->chaining);
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->rem_ptr = 0;
return SUCCESS_GR;
}
/*
HashReturn_gr init_groestl( hashState_groestl* ctx )
{
return Xinit_groestl( ctx, 64 );
}
*/
HashReturn_gr reinit_groestl( hashState_groestl* ctx )
{
int i;
for ( i = 0; i < SIZE / 8; i++ )
ctx->chaining[i] = 0;
for ( i = 0; i < SIZE; i++ )
ctx->buffer[i] = 0;
if (ctx->chaining == NULL || ctx->buffer == NULL)
return FAIL_GR;
/* set initial value */
ctx->chaining[COLS-1] = U64BIG( (u64)LENGTH );
INIT( ctx->chaining );
for ( i = 0; i < SIZE512; i++ )
{
ctx->chaining[i] = _mm_setzero_si128();
ctx->buffer[i] = _mm_setzero_si128();
}
((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
INIT(ctx->chaining);
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->rem_ptr = 0;
return SUCCESS_GR;
}
//// midstate is broken
// To use midstate:
// 1. midstate must process all full blocks.
// 2. tail must be less than a full block and may not straddle a
// block boundary.
// 3. midstate and tail each must be multiples of 128 bits.
// 4. For best performance midstate length is a multiple of block size.
// 5. Midstate will work at reduced impact than full hash, if total hash
// (midstate + tail) is less than 1 block.
// This, unfortunately, is the case with all current users.
// 6. the morefull blocks the bigger the gain
/* update state with databitlen bits of input */
HashReturn_gr update_groestl( hashState_groestl* ctx,
const BitSequence_gr* input,
DataLength_gr databitlen )
// use only for midstate precalc
HashReturn_gr update_groestl( hashState_groestl* ctx, const void* input,
DataLength_gr databitlen )
{
int i;
const int msglen = (int)(databitlen/8);
__m128i* in = (__m128i*)input;
const int len = (int)databitlen / 128; // bits to __m128i
const int blocks = len / SIZE512; // __M128i to blocks
int rem = ctx->rem_ptr;
int i;
/* digest bulk of message */
Transform( ctx, input, msglen );
ctx->blk_count = blocks;
ctx->databitlen = databitlen;
/* store remaining data in buffer */
i = ( msglen / SIZE ) * SIZE;
while ( i < msglen )
ctx->buffer[(int)ctx->buf_ptr++] = input[i++];
// digest any full blocks
for ( i = 0; i < blocks; i++ )
TF1024( ctx->chaining, &in[ i * SIZE512 ] );
// adjust buf_ptr to last block
ctx->buf_ptr = blocks * SIZE512;
return SUCCESS_GR;
// copy any remaining data to buffer for final hash, it may already
// contain data from a previous update for a midstate precalc
for ( i = 0; i < len % SIZE512; i++ )
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
// adjust rem_ptr for possible new data
ctx->rem_ptr += i;
return SUCCESS_GR;
}
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
HashReturn_gr final_groestl( hashState_groestl* ctx,
BitSequence_gr* output )
// deprecated do not use
HashReturn_gr final_groestl( hashState_groestl* ctx, void* output )
{
int i, j;
const int len = (int)ctx->databitlen / 128; // bits to __m128i
const int blocks = ctx->blk_count + 1; // adjust for final block
ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
/* pad with '0'-bits */
if ( ctx->buf_ptr > SIZE - LENGTHFIELDLEN )
{
/* padding requires two blocks */
while ( ctx->buf_ptr < SIZE )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
/* digest first padding block */
Transform( ctx, ctx->buffer, SIZE );
ctx->buf_ptr = 0;
}
const int rem_ptr = ctx->rem_ptr; // end of data start of padding
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
const int hash_offset = SIZE512 - hashlen_m128i; // where in buffer
int i;
// this will pad up to 120 bytes
while ( ctx->buf_ptr < SIZE - LENGTHFIELDLEN )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
// first pad byte = 0x80, last pad byte = block count
// everything in between is zero
/* length padding */
ctx->block_counter++;
ctx->buf_ptr = SIZE;
while ( ctx->buf_ptr > SIZE - LENGTHFIELDLEN )
{
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
ctx->block_counter >>= 8;
}
if ( rem_ptr == len - 1 )
{
// only 128 bits left in buffer, all padding at once
ctx->buffer[rem_ptr] = _mm_set_epi8( blocks,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
}
else
{
// add first padding
ctx->buffer[rem_ptr] = _mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
// add zero padding
for ( i = rem_ptr + 1; i < SIZE512 - 1; i++ )
ctx->buffer[i] = _mm_setzero_si128();
/* digest final padding block */
Transform( ctx, ctx->buffer, SIZE );
/* perform output transformation */
OutputTransformation( ctx );
// add length padding, second last byte is zero unless blocks > 255
ctx->buffer[i] = _mm_set_epi8( blocks, blocks>>8, 0,0, 0,0,0,0,
0, 0 ,0,0, 0,0,0,0 );
}
// store hash result in output
for ( i = ( SIZE - ctx->hashlen) / 16, j = 0; i < SIZE / 16; i++, j++ )
casti_m128i( output, j ) = casti_m128i( ctx->chaining , i );
// digest final padding block and do output transform
TF1024( ctx->chaining, ctx->buffer );
OF1024( ctx->chaining );
return SUCCESS_GR;
// store hash result in output
for ( i = 0; i < hashlen_m128i; i++ )
casti_m128i( output, i ) = ctx->chaining[ hash_offset + i];
return SUCCESS_GR;
}
HashReturn_gr update_and_final_groestl( hashState_groestl* ctx,
BitSequence_gr* output, const BitSequence_gr* input,
DataLength_gr databitlen )
HashReturn_gr update_and_final_groestl( hashState_groestl* ctx, void* output,
const void* input, DataLength_gr databitlen )
{
const int inlen = (int)(databitlen/8); // need bytes
int i, j;
const int len = (int)databitlen / 128;
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
const int hash_offset = SIZE512 - hashlen_m128i;
int rem = ctx->rem_ptr;
int blocks = len / SIZE512;
__m128i* in = (__m128i*)input;
int i;
/* digest bulk of message */
Transform( ctx, input, inlen );
// --- update ---
/* store remaining data in buffer */
i = ( inlen / SIZE ) * SIZE;
while ( i < inlen )
ctx->buffer[(int)ctx->buf_ptr++] = input[i++];
// digest any full blocks, process directly from input
for ( i = 0; i < blocks; i++ )
TF1024( ctx->chaining, &in[ i * SIZE512 ] );
ctx->buf_ptr = blocks * SIZE512;
// start of final
// copy any remaining data to buffer, it may already contain data
// from a previous update for a midstate precalc
for ( i = 0; i < len % SIZE512; i++ )
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
i += rem; // use i as rem_ptr in final
ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
//--- final ---
/* pad with '0'-bits */
if ( ctx->buf_ptr > SIZE - LENGTHFIELDLEN )
{
/* padding requires two blocks */
while ( ctx->buf_ptr < SIZE )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
memset( ctx->buffer + ctx->buf_ptr, 0, SIZE - ctx->buf_ptr );
/* digest first padding block */
Transform( ctx, ctx->buffer, SIZE );
ctx->buf_ptr = 0;
}
blocks++; // adjust for final block
// this will pad up to 120 bytes
memset( ctx->buffer + ctx->buf_ptr, 0, SIZE - ctx->buf_ptr - LENGTHFIELDLEN );
if ( i == len -1 )
{
// only 128 bits left in buffer, all padding at once
ctx->buffer[i] = _mm_set_epi8( blocks,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
}
else
{
// add first padding
ctx->buffer[i] = _mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
// add zero padding
for ( i += 1; i < SIZE512 - 1; i++ )
ctx->buffer[i] = _mm_setzero_si128();
/* length padding */
ctx->block_counter++;
ctx->buf_ptr = SIZE;
while (ctx->buf_ptr > SIZE - LENGTHFIELDLEN)
{
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
ctx->block_counter >>= 8;
}
// add length padding, second last byte is zero unless blocks > 255
ctx->buffer[i] = _mm_set_epi8( blocks, blocks>>8, 0,0, 0,0,0,0,
0, 0 ,0,0, 0,0,0,0 );
}
/* digest final padding block */
Transform( ctx, ctx->buffer, SIZE );
/* perform output transformation */
OutputTransformation( ctx );
// digest final padding block and do output transform
TF1024( ctx->chaining, ctx->buffer );
OF1024( ctx->chaining );
// store hash result in output
for ( i = ( SIZE - ctx->hashlen) / 16, j = 0; i < SIZE / 16; i++, j++ )
casti_m128i( output, j ) = casti_m128i( ctx->chaining , i );
// store hash result in output
for ( i = 0; i < hashlen_m128i; i++ )
casti_m128i( output, i ) = ctx->chaining[ hash_offset + i ];
return SUCCESS_GR;
return SUCCESS_GR;
}
/* hash bit sequence */

View File

@@ -9,6 +9,8 @@
#ifndef __hash_h
#define __hash_h
#include <immintrin.h>
#include <stdio.h>
#if defined(_WIN64) || defined(__WINDOWS__)
#include <windows.h>
@@ -19,27 +21,27 @@
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#include "algo/sha/brg_types.h"
/* some sizes (number of bytes) */
#define ROWS (8)
#define LENGTHFIELDLEN (ROWS)
#define COLS512 (8)
//#define COLS512 (8)
#define COLS1024 (16)
#define SIZE512 ((ROWS)*(COLS512))
#define SIZE1024 ((ROWS)*(COLS1024))
#define ROUNDS512 (10)
//#define SIZE512 ((ROWS)*(COLS512))
#define SIZE_1024 ((ROWS)*(COLS1024))
//#define ROUNDS512 (10)
#define ROUNDS1024 (14)
#if LENGTH<=256
#define COLS (COLS512)
#define SIZE (SIZE512)
#define ROUNDS (ROUNDS512)
#else
//#if LENGTH<=256
//#define COLS (COLS512)
//#define SIZE (SIZE512)
//#define ROUNDS (ROUNDS512)
//#else
#define COLS (COLS1024)
#define SIZE (SIZE1024)
//#define SIZE (SIZE1024)
#define ROUNDS (ROUNDS1024)
#endif
//#endif
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
@@ -61,31 +63,29 @@ typedef unsigned char BitSequence_gr;
typedef unsigned long long DataLength_gr;
typedef enum { SUCCESS_GR = 0, FAIL_GR = 1, BAD_HASHBITLEN_GR = 2} HashReturn_gr;
// Use area128 overlay for buffer to facilitate fast copying
#define SIZE512 (SIZE_1024/16)
typedef struct {
__attribute__ ((aligned (32))) u64 chaining[SIZE/8]; // actual state
__attribute__ ((aligned (32))) BitSequence_gr buffer[SIZE]; // data buffer
u64 block_counter; /* message block counter */
int hashlen; // bytes
int buf_ptr; /* data buffer pointer */
__attribute__ ((aligned (64))) __m128i chaining[SIZE512];
__attribute__ ((aligned (64))) __m128i buffer[SIZE512];
int hashlen; // byte
int blk_count; // SIZE_m128i
int buf_ptr; // __m128i offset
int rem_ptr;
int databitlen; // bits
} hashState_groestl;
//HashReturn_gr init_groestl( hashState_groestl* );
HashReturn_gr init_groestl( hashState_groestl*, int );
HashReturn_gr reinit_groestl( hashState_groestl* );
HashReturn_gr update_groestl( hashState_groestl*, const BitSequence_gr*,
HashReturn_gr update_groestl( hashState_groestl*, const void*,
DataLength_gr );
HashReturn_gr final_groestl( hashState_groestl*, BitSequence_gr* );
HashReturn_gr final_groestl( hashState_groestl*, void* );
HashReturn_gr hash_groestl( int, const BitSequence_gr*, DataLength_gr,
BitSequence_gr* );
HashReturn_gr update_and_final_groestl( hashState_groestl*,
BitSequence_gr*, const BitSequence_gr*, DataLength_gr );
HashReturn_gr update_and_final_groestl( hashState_groestl*, void*,
const void*, DataLength_gr );
#endif /* __hash_h */

View File

@@ -49,187 +49,201 @@
#endif
#endif
/* digest up to len bytes of input (full blocks only) */
void Transform256(hashState_groestl256 *ctx,
const u8 *in,
unsigned long long len) {
/* increment block counter */
ctx->block_counter += len/SIZE;
/* digest message, one block at a time */
for (; len >= SIZE; len -= SIZE, in += SIZE)
TF512((u64*)ctx->chaining, (u64*)in);
asm volatile ("emms");
}
/* given state h, do h <- P(h)+h */
void OutputTransformation256(hashState_groestl256 *ctx) {
/* determine variant */
OF512((u64*)ctx->chaining);
asm volatile ("emms");
}
/* initialise context */
HashReturn_gr init_groestl256( hashState_groestl256* ctx, int hashlen )
{
u8 i = 0;
int i;
ctx->hashlen = hashlen;
SET_CONSTANTS();
for (i=0; i<SIZE/8; i++)
ctx->chaining[i] = 0;
for (i=0; i<SIZE; i++)
ctx->buffer[i] = 0;
if (ctx->chaining == NULL || ctx->buffer == NULL)
return FAIL_GR;
/* set initial value */
ctx->chaining[COLS-1] = U64BIG((u64)256);
INIT256(ctx->chaining);
/* set other variables */
for ( i = 0; i < SIZE256; i++ )
{
ctx->chaining[i] = _mm_setzero_si128();
ctx->buffer[i] = _mm_setzero_si128();
}
((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
INIT256( ctx->chaining );
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->rem_ptr = 0;
return SUCCESS_GR;
}
HashReturn_gr reinit_groestl256(hashState_groestl256* ctx)
{
int i;
for (i=0; i<SIZE/8; i++)
ctx->chaining[i] = 0;
for (i=0; i<SIZE; i++)
ctx->buffer[i] = 0;
if (ctx->chaining == NULL || ctx->buffer == NULL)
return FAIL_GR;
/* set initial value */
ctx->chaining[COLS-1] = 256;
for ( i = 0; i < SIZE256; i++ )
{
ctx->chaining[i] = _mm_setzero_si128();
ctx->buffer[i] = _mm_setzero_si128();
}
((u64*)ctx->chaining)[COLS-1] = U64BIG((u64)LENGTH);
INIT256(ctx->chaining);
/* set other variables */
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->rem_ptr = 0;
return SUCCESS_GR;
}
HashReturn_gr update_groestl256( hashState_groestl256* ctx,
const BitSequence_gr* input, DataLength_gr databitlen )
// Use this only for midstate and never for cryptonight
HashReturn_gr update_groestl256( hashState_groestl256* ctx, const void* input,
DataLength_gr databitlen )
{
const int msglen = (int)(databitlen/8); // bytes
int i;
__m128i* in = (__m128i*)input;
const int len = (int)databitlen / 128; // bits to __m128i
const int blocks = len / SIZE256; // __M128i to blocks
int rem = ctx->rem_ptr;
int i;
/* digest bulk of message */
Transform256( ctx, input, msglen );
ctx->blk_count = blocks;
ctx->databitlen = databitlen;
/* store remaining data in buffer */
i = ( msglen / SIZE ) * SIZE;
while ( i < msglen )
ctx->buffer[(int)ctx->buf_ptr++] = input[i++];
// digest any full blocks
for ( i = 0; i < blocks; i++ )
TF512( ctx->chaining, &in[ i * SIZE256 ] );
// adjust buf_ptr to last block
ctx->buf_ptr = blocks * SIZE256;
return SUCCESS_GR;
// Copy any remainder to buffer
for ( i = 0; i < len % SIZE256; i++ )
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
// adjust rem_ptr for new data
ctx->rem_ptr += i;
return SUCCESS_GR;
}
HashReturn_gr final_groestl256( hashState_groestl256* ctx,
BitSequence_gr* output )
// don't use this at all
HashReturn_gr final_groestl256( hashState_groestl256* ctx, void* output )
{
ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
const int len = (int)ctx->databitlen / 128; // bits to __m128i
const int blocks = ctx->blk_count + 1; // adjust for final block
const int rem_ptr = ctx->rem_ptr; // end of data start of padding
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
const int hash_offset = SIZE256 - hashlen_m128i; // where in buffer
int i;
/* pad with '0'-bits */
if ( ctx->buf_ptr > SIZE - LENGTHFIELDLEN )
{
/* padding requires two blocks */
while ( ctx->buf_ptr < SIZE )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
/* digest first padding block */
Transform256( ctx, ctx->buffer, SIZE );
ctx->buf_ptr = 0;
}
while ( ctx->buf_ptr < SIZE - LENGTHFIELDLEN )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
// first pad byte = 0x80, last pad byte = block count
// everything in between is zero
/* length padding */
ctx->block_counter++;
ctx->buf_ptr = SIZE;
while ( ctx->buf_ptr > SIZE - LENGTHFIELDLEN )
{
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
ctx->block_counter >>= 8;
}
if ( rem_ptr == len - 1 )
{
// all padding at once
ctx->buffer[rem_ptr] = _mm_set_epi8( blocks,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
}
else
{
// add first padding
ctx->buffer[rem_ptr] = _mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
// add zero padding
for ( i = rem_ptr + 1; i < SIZE256 - 1; i++ )
ctx->buffer[i] = _mm_setzero_si128();
// add length padding
// cheat since we know the block count is trivial, good if block < 256
ctx->buffer[i] = _mm_set_epi8( blocks,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0 );
}
/* digest final padding block */
Transform256( ctx, ctx->buffer, SIZE );
/* perform output transformation */
OutputTransformation256( ctx );
// digest final padding block and do output transform
TF512( ctx->chaining, ctx->buffer );
OF512( ctx->chaining );
/* store hash result in output */
for ( int i = ( (SIZE - ctx->hashlen) / 16 ), j = 0; i < SIZE/16; i++, j++ )
casti_m128i( output, j ) = casti_m128i( ctx->chaining, i );
// store hash result in output
for ( i = 0; i < hashlen_m128i; i++ )
casti_m128i( output, i ) = ctx->chaining[ hash_offset + i];
return SUCCESS_GR;
return SUCCESS_GR;
}
HashReturn_gr update_and_final_groestl256( hashState_groestl256* ctx,
BitSequence_gr* output, const BitSequence_gr* input,
DataLength_gr databitlen )
void* output, const void* input, DataLength_gr databitlen )
{
const int msglen = (int)(databitlen/8); // bytes
int i, j;
const int len = (int)databitlen / 128;
const int hashlen_m128i = ctx->hashlen / 16; // bytes to __m128i
const int hash_offset = SIZE256 - hashlen_m128i;
int rem = ctx->rem_ptr;
int blocks = len / SIZE256;
__m128i* in = (__m128i*)input;
int i;
/* digest bulk of message */
Transform256( ctx, input, msglen );
// --- update ---
/* store remaining data in buffer */
i = ( msglen / SIZE ) * SIZE;
while ( i < msglen )
ctx->buffer[(int)ctx->buf_ptr++] = input[i++];
// digest any full blocks, process directly from input
for ( i = 0; i < blocks; i++ )
TF512( ctx->chaining, &in[ i * SIZE256 ] );
ctx->buf_ptr = blocks * SIZE256;
// start of final
ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
// cryptonight has 200 byte input, an odd number of __m128i
// remainder is only 8 bytes, ie u64.
if ( databitlen % 128 !=0 )
{
// must be cryptonight, copy 64 bits of data
*(uint64_t*)(ctx->buffer) = *(uint64_t*)(&in[ ctx->buf_ptr ] );
i = -1; // signal for odd length
}
else
{
// Copy any remaining data to buffer for final transform
for ( i = 0; i < len % SIZE256; i++ )
ctx->buffer[ rem + i ] = in[ ctx->buf_ptr + i ];
i += rem; // use i as rem_ptr in final
}
/* pad with '0'-bits */
if ( ctx->buf_ptr > SIZE - LENGTHFIELDLEN )
{
/* padding requires two blocks */
while ( ctx->buf_ptr < SIZE )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
/* digest first padding block */
Transform256( ctx, ctx->buffer, SIZE );
ctx->buf_ptr = 0;
}
while ( ctx->buf_ptr < SIZE - LENGTHFIELDLEN )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
//--- final ---
/* length padding */
ctx->block_counter++;
ctx->buf_ptr = SIZE;
while ( ctx->buf_ptr > SIZE - LENGTHFIELDLEN )
{
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
ctx->block_counter >>= 8;
}
// adjust for final block
blocks++;
/* digest final padding block */
Transform256( ctx, ctx->buffer, SIZE );
/* perform output transformation */
OutputTransformation256( ctx );
if ( i == len - 1 )
{
// all padding at once
ctx->buffer[i] = _mm_set_epi8( blocks,blocks>>8,0,0, 0,0,0,0,
0, 0,0,0, 0,0,0,0x80 );
}
else
{
if ( i == -1 )
{
// cryptonight odd length
((uint64_t*)ctx->buffer)[ 1 ] = 0x80ull;
// finish the block with zero and length padding as normal
i = 0;
}
else
{
// add first padding
ctx->buffer[i] = _mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 );
}
// add zero padding
for ( i += 1; i < SIZE256 - 1; i++ )
ctx->buffer[i] = _mm_setzero_si128();
// add length padding
// cheat since we know the block count is trivial, good if block < 256
ctx->buffer[i] = _mm_set_epi8( blocks,blocks>>8,0,0, 0,0,0,0,
0, 0,0,0, 0,0,0,0 );
}
/* store hash result in output */
for ( i = ( (SIZE - ctx->hashlen) / 16 ), j = 0; i < SIZE/16; i++, j++ )
casti_m128i( output, j ) = casti_m128i( ctx->chaining, i );
// digest final padding block and do output transform
TF512( ctx->chaining, ctx->buffer );
OF512( ctx->chaining );
return SUCCESS_GR;
// store hash result in output
for ( i = 0; i < hashlen_m128i; i++ )
casti_m128i( output, i ) = ctx->chaining[ hash_offset + i ];
return SUCCESS_GR;
}
/* hash bit sequence */

View File

@@ -9,6 +9,7 @@
#ifndef __hash_h
#define __hash_h
#include <immintrin.h>
#include <stdio.h>
#if defined(_WIN64) || defined(__WINDOWS__)
#include <windows.h>
@@ -34,29 +35,27 @@ typedef crypto_uint64 u64;
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#include "algo/sha/brg_types.h"
#ifdef IACA_TRACE
#include IACA_MARKS
#endif
//#ifndef LENGTH
//#define LENGTH (256)
//#endif
#define LENGTH (256)
/* some sizes (number of bytes) */
#define ROWS (8)
#define LENGTHFIELDLEN (ROWS)
#define COLS512 (8)
#define COLS1024 (16)
#define SIZE512 ((ROWS)*(COLS512))
#define SIZE1024 ((ROWS)*(COLS1024))
//#define COLS1024 (16)
#define SIZE_512 ((ROWS)*(COLS512))
//#define SIZE1024 ((ROWS)*(COLS1024))
#define ROUNDS512 (10)
#define ROUNDS1024 (14)
//#define ROUNDS1024 (14)
//#if LENGTH<=256
#define COLS (COLS512)
#define SIZE (SIZE512)
//#define SIZE (SIZE512)
#define ROUNDS (ROUNDS512)
//#else
//#define COLS (COLS1024)
@@ -89,28 +88,34 @@ typedef enum
BAD_HASHBITLEN_GR = 2
} HashReturn_gr;
#define SIZE256 (SIZE_512/16)
typedef struct {
__attribute__ ((aligned (32))) u64 chaining[SIZE/8]; /* actual state */
__attribute__ ((aligned (32))) BitSequence_gr buffer[SIZE]; /* data buffer */
u64 block_counter; /* message block counter */
__attribute__ ((aligned (32))) __m128i chaining[SIZE256];
__attribute__ ((aligned (32))) __m128i buffer[SIZE256];
// __attribute__ ((aligned (32))) u64 chaining[SIZE/8]; /* actual state */
// __attribute__ ((aligned (32))) BitSequence_gr buffer[SIZE]; /* data buffer */
// u64 block_counter; /* message block counter */
int hashlen; // bytes
int blk_count;
int buf_ptr; /* data buffer pointer */
int rem_ptr;
int databitlen;
} hashState_groestl256;
HashReturn_gr init_groestl256( hashState_groestl256*, int );
HashReturn_gr reinit_groestl( hashState_groestl256* );
HashReturn_gr reinit_groestl256( hashState_groestl256* );
HashReturn_gr update_groestl( hashState_groestl256*, const BitSequence_gr*,
HashReturn_gr update_groestl256( hashState_groestl256*, const void*,
DataLength_gr );
HashReturn_gr final_groestl( hashState_groestl256*, BitSequence_gr* );
HashReturn_gr final_groestl256( hashState_groestl256*, void* );
HashReturn_gr hash_groestl( int, const BitSequence_gr*, DataLength_gr,
HashReturn_gr hash_groestli256( int, const BitSequence_gr*, DataLength_gr,
BitSequence_gr* );
HashReturn_gr update_and_final_groestl256( hashState_groestl256*,
BitSequence_gr*, const BitSequence_gr*,
DataLength_gr );
HashReturn_gr update_and_final_groestl256( hashState_groestl256*, void*,
const void*, DataLength_gr );
#endif /* __hash_h */

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdio.h>
@@ -15,7 +14,7 @@
typedef struct
{
#ifdef NO_AES_NI
sph_groestl512_context groestl;
sph_groestl512_context groestl1, groestl2;
#else
hashState_groestl groestl1, groestl2;
#endif
@@ -27,43 +26,42 @@ static groestl_ctx_holder groestl_ctx;
void init_groestl_ctx()
{
#ifdef NO_AES_NI
sph_groestl512_init( &groestl_ctx.groestl );
sph_groestl512_init( &groestl_ctx.groestl1 );
sph_groestl512_init( &groestl_ctx.groestl2 );
#else
init_groestl( &groestl_ctx.groestl1, 64 );
init_groestl( &groestl_ctx.groestl2, 64 );
#endif
}
void groestlhash(void *output, const void *input)
void groestlhash( void *output, const void *input )
{
uint32_t _ALIGN(32) hash[16];
groestl_ctx_holder ctx;
uint32_t hash[16] __attribute__ ((aligned (64)));
groestl_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &groestl_ctx, sizeof(groestl_ctx) );
// memset(&hash[0], 0, sizeof(hash));
#ifdef NO_AES_NI
sph_groestl512(&ctx.groestl, input, 80);
sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512(&ctx.groestl1, input, 80);
sph_groestl512_close(&ctx.groestl1, hash);
sph_groestl512(&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512(&ctx.groestl2, hash, 64);
sph_groestl512_close(&ctx.groestl2, hash);
#else
update_groestl( &ctx.groestl1, (char*)input, 640 );
final_groestl( &ctx.groestl1,(char*)hash);
update_and_final_groestl( &ctx.groestl1, (char*)hash,
(const char*)input, 640 );
update_groestl( &ctx.groestl2, (char*)hash, 512 );
final_groestl( &ctx.groestl2, (char*)hash);
update_and_final_groestl( &ctx.groestl2, (char*)hash,
(const char*)hash, 512 );
#endif
memcpy(output, hash, 32);
memcpy(output, hash, 32);
}
int scanhash_groestl(int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done)
int scanhash_groestl( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t _ALIGN(64) endiandata[20];
uint32_t endiandata[20] __attribute__ ((aligned (64)));
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
@@ -74,7 +72,7 @@ int scanhash_groestl(int thr_id, struct work *work,
do {
const uint32_t Htarg = ptarget[7];
uint32_t hash[8];
uint32_t hash[8] __attribute__ ((aligned (64)));
be32enc(&endiandata[19], nonce);
groestlhash(hash, endiandata);
@@ -100,16 +98,21 @@ void groestl_set_target( struct work* work, double job_diff )
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool register_groestl_algo( algo_gate_t* gate )
bool register_dmd_gr_algo( algo_gate_t* gate )
{
init_groestl_ctx();
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_groestl;
gate->hash = (void*)&groestlhash;
gate->hash_alt = (void*)&groestlhash;
gate->set_target = (void*)&groestl_set_target;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};
bool register_groestl_algo( algo_gate_t* gate )
{
register_dmd_gr_algo( gate );
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
return true;
};

View File

@@ -1,5 +1,4 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "myrgr-gate.h"
#include <stdio.h>
#include <stdlib.h>
@@ -11,7 +10,7 @@
#else
#include "aes_ni/hash-groestl.h"
#endif
#include "algo/sha3/sph_sha2.h"
#include "algo/sha/sph_sha2.h"
typedef struct {
#ifdef NO_AES_NI
@@ -34,7 +33,7 @@ void init_myrgr_ctx()
sph_sha256_init(&myrgr_ctx.sha);
}
void myriadhash(void *output, const void *input)
void myriad_hash(void *output, const void *input)
{
myrgr_ctx_holder ctx;
memcpy( &ctx, &myrgr_ctx, sizeof(myrgr_ctx) );
@@ -74,7 +73,7 @@ int scanhash_myriad(int thr_id, struct work *work,
const uint32_t Htarg = ptarget[7];
uint32_t hash[8];
be32enc(&endiandata[19], nonce);
myriadhash(hash, endiandata);
myriad_hash(hash, endiandata);
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
pdata[19] = nonce;
@@ -89,15 +88,15 @@ int scanhash_myriad(int thr_id, struct work *work,
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
/*
bool register_myriad_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT;
init_myrgr_ctx();
gate->scanhash = (void*)&scanhash_myriad;
gate->hash = (void*)&myriadhash;
gate->hash_alt = (void*)&myriadhash;
// gate->hash_alt = (void*)&myriadhash;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};
*/

134
algo/groestl/myrgr-4way.c Normal file
View File

@@ -0,0 +1,134 @@
#include "myrgr-gate.h"
#if defined(MYRGR_4WAY)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "aes_ni/hash-groestl.h"
#include "algo/sha/sha2-hash-4way.h"
typedef struct {
hashState_groestl groestl;
sha256_4way_context sha;
} myrgr_4way_ctx_holder;
myrgr_4way_ctx_holder myrgr_4way_ctx;
void init_myrgr_4way_ctx()
{
init_groestl (&myrgr_4way_ctx.groestl, 64 );
sha256_4way_init( &myrgr_4way_ctx.sha );
}
void myriad_4way_hash( void *output, const void *input )
{
uint32_t hash0[20] __attribute__ ((aligned (64)));
uint32_t hash1[20] __attribute__ ((aligned (64)));
uint32_t hash2[20] __attribute__ ((aligned (64)));
uint32_t hash3[20] __attribute__ ((aligned (64)));
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
myrgr_4way_ctx_holder ctx;
memcpy( &ctx, &myrgr_4way_ctx, sizeof(myrgr_4way_ctx) );
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, input, 640 );
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 640 );
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 640 );
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 640 );
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 640 );
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
sha256_4way( &ctx.sha, vhash, 64 );
sha256_4way_close( &ctx.sha, vhash );
mm_deinterleave_4x32( output, output+32, output+64, output+96,
vhash, 256 );
}
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 76; // 19*4
uint32_t *noncep1 = vdata + 77;
uint32_t *noncep2 = vdata + 78;
uint32_t *noncep3 = vdata + 79;
/*
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t _ALIGN(64) endiandata[20];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
*/
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;
swab32_array( edata, pdata, 20 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
myriad_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)
&& !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

18
algo/groestl/myrgr-gate.c Normal file
View File

@@ -0,0 +1,18 @@
#include "myrgr-gate.h"
bool register_myriad_algo( algo_gate_t* gate )
{
#if defined (MYRGR_4WAY)
init_myrgr_4way_ctx();
gate->scanhash = (void*)&scanhash_myriad_4way;
gate->hash = (void*)&myriad_4way_hash;
#else
init_myrgr_ctx();
gate->scanhash = (void*)&scanhash_myriad;
gate->hash = (void*)&myriad_hash;
#endif
gate->optimizations = AES_OPT | AVX2_OPT;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

30
algo/groestl/myrgr-gate.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef MYRGR_GATE_H__
#define MYRGR_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__) && defined(__AES__)
#define MYRGR_4WAY
#endif
#if defined(MYRGR_4WAY)
void myriad_4way_hash( void *state, const void *input );
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_myrgr_4way_ctx();
#endif
void myriad_hash( void *state, const void *input );
int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_myrgr_ctx();
#endif

View File

@@ -40,7 +40,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for Groestl-224.

View File

@@ -1,133 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
*/
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( _LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if !defined(PLATFORM_BYTE_ORDER)
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#else
# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
#endif
#endif
#endif

View File

@@ -1,231 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
(a few lines added by Soeren S. Thomsen, October 2008)
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
The unsigned integer types defined here are of the form uint_<nn>t where
<nn> is the length of the type; for example, the unsigned 32-bit type is
'uint_32t'. These are NOT the same as the 'C99 integer types' that are
defined in the inttypes.h and stdint.h headers since attempts to use these
types have shown that support for them is still highly variable. However,
since the latter are of the form uint<nn>_t, a regular expression search
and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t')
can be used to convert the types used here to the C99 standard types.
*/
#ifndef _BRG_TYPES_H
#define _BRG_TYPES_H
#if defined(__cplusplus)
extern "C" {
#endif
#include <limits.h>
#if defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
# include <stddef.h>
# define ptrint_t intptr_t
#elif defined( __GNUC__ ) && ( __GNUC__ >= 3 )
# include <stdint.h>
# define ptrint_t intptr_t
#else
# define ptrint_t int
#endif
#ifndef BRG_UI8
# define BRG_UI8
# if UCHAR_MAX == 255u
typedef unsigned char uint_8t;
# else
# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h
# endif
#endif
#ifndef BRG_UI16
# define BRG_UI16
# if USHRT_MAX == 65535u
typedef unsigned short uint_16t;
# else
# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h
# endif
#endif
#ifndef BRG_UI32
# define BRG_UI32
# if UINT_MAX == 4294967295u
# define li_32(h) 0x##h##u
typedef unsigned int uint_32t;
# elif ULONG_MAX == 4294967295u
# define li_32(h) 0x##h##ul
typedef unsigned long uint_32t;
# elif defined( _CRAY )
# error This code needs 32-bit data types, which Cray machines do not provide
# else
# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h
# endif
#endif
#ifndef BRG_UI64
# if defined( __BORLANDC__ ) && !defined( __MSDOS__ )
# define BRG_UI64
# define li_64(h) 0x##h##ui64
typedef unsigned __int64 uint_64t;
# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */
# define BRG_UI64
# define li_64(h) 0x##h##ui64
typedef unsigned __int64 uint_64t;
# elif defined( __sun ) && defined( ULONG_MAX ) && ULONG_MAX == 0xfffffffful
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# elif defined( __MVS__ )
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned int long long uint_64t;
# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u
# if UINT_MAX == 18446744073709551615u
# define BRG_UI64
# define li_64(h) 0x##h##u
typedef unsigned int uint_64t;
# endif
# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u
# if ULONG_MAX == 18446744073709551615ul
# define BRG_UI64
# define li_64(h) 0x##h##ul
typedef unsigned long uint_64t;
# endif
# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u
# if ULLONG_MAX == 18446744073709551615ull
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u
# if ULONG_LONG_MAX == 18446744073709551615ull
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
# endif
#endif
#if !defined( BRG_UI64 )
# if defined( NEED_UINT_64T )
# error Please define uint_64t as an unsigned 64 bit type in brg_types.h
# endif
#endif
#ifndef RETURN_VALUES
# define RETURN_VALUES
# if defined( DLL_EXPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllexport ) void __stdcall
# define INT_RETURN __declspec( dllexport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllexport__ ) void
# define INT_RETURN __declspec( __dllexport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( DLL_IMPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllimport ) void __stdcall
# define INT_RETURN __declspec( dllimport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllimport__ ) void
# define INT_RETURN __declspec( __dllimport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( __WATCOMC__ )
# define VOID_RETURN void __cdecl
# define INT_RETURN int __cdecl
# else
# define VOID_RETURN void
# define INT_RETURN int
# endif
#endif
/* These defines are used to detect and set the memory alignment of pointers.
Note that offsets are in bytes.
ALIGN_OFFSET(x,n) return the positive or zero offset of
the memory addressed by the pointer 'x'
from an address that is aligned on an
'n' byte boundary ('n' is a power of 2)
ALIGN_FLOOR(x,n) return a pointer that points to memory
that is aligned on an 'n' byte boundary
and is not higher than the memory address
pointed to by 'x' ('n' is a power of 2)
ALIGN_CEIL(x,n) return a pointer that points to memory
that is aligned on an 'n' byte boundary
and is not lower than the memory address
pointed to by 'x' ('n' is a power of 2)
*/
#define ALIGN_OFFSET(x,n) (((ptrint_t)(x)) & ((n) - 1))
#define ALIGN_FLOOR(x,n) ((uint_8t*)(x) - ( ((ptrint_t)(x)) & ((n) - 1)))
#define ALIGN_CEIL(x,n) ((uint_8t*)(x) + (-((ptrint_t)(x)) & ((n) - 1)))
/* These defines are used to declare buffers in a way that allows
faster operations on longer variables to be used. In all these
defines 'size' must be a power of 2 and >= 8. NOTE that the
buffer size is in bytes but the type length is in bits
UNIT_TYPEDEF(x,size) declares a variable 'x' of length
'size' bits
BUFR_TYPEDEF(x,size,bsize) declares a buffer 'x' of length 'bsize'
bytes defined as an array of variables
each of 'size' bits (bsize must be a
multiple of size / 8)
UNIT_CAST(x,size) casts a variable to a type of
length 'size' bits
UPTR_CAST(x,size) casts a pointer to a pointer to a
varaiable of length 'size' bits
*/
#define UI_TYPE(size) uint_##size##t
#define UNIT_TYPEDEF(x,size) typedef UI_TYPE(size) x
#define BUFR_TYPEDEF(x,size,bsize) typedef UI_TYPE(size) x[bsize / (size >> 3)]
#define UNIT_CAST(x,size) ((UI_TYPE(size) )(x))
#define UPTR_CAST(x,size) ((UI_TYPE(size)*)(x))
/* Added by Soeren S. Thomsen (begin) */
#define u8 uint_8t
#define u32 uint_32t
#define u64 uint_64t
/* (end) */
#if defined(__cplusplus)
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +0,0 @@
#ifndef GRSOASM_H
#define GRSOASM_H
#include "grso.h"
void grsoP1024ASM (u64 *x) ;
void grsoQ1024ASM (u64 *x) ;
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +0,0 @@
#ifndef GRSOASM_H
#define GRSOASM_H
/* really same as the mmx asm.h */
/* made just in case something must be changed */
#include "grso.h"
void grsoP1024ASM (u64 *x) ;
void grsoQ1024ASM (u64 *x) ;
#endif

View File

@@ -1,110 +0,0 @@
/* hash.c January 2011
*
* Groestl-512 implementation with inline assembly containing mmx and
* sse instructions. Optimized for Opteron.
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
*
* This code is placed in the public domain
*/
//#include "grso.h"
//#include "grso-asm.h"
// #include "grsotab.h"
#define DECL_GRS
/* load initial constants */
#define GRS_I \
do { \
int i; \
/* set initial value */ \
for (i = 0; i < grsoCOLS-1; i++) sts_grs.grsstate[i] = 0; \
sts_grs.grsstate[grsoCOLS-1] = grsoU64BIG((u64)(8*grsoDIGESTSIZE)); \
\
/* set other variables */ \
sts_grs.grsbuf_ptr = 0; \
sts_grs.grsblock_counter = 0; \
} while (0); \
/* load hash */
#define GRS_U \
do { \
unsigned char* in = hash; \
unsigned long long index = 0; \
\
/* if the buffer contains data that has not yet been digested, first \
add data to buffer until full */ \
if (sts_grs.grsbuf_ptr) { \
while (sts_grs.grsbuf_ptr < grsoSIZE && index < 64) { \
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
} \
if (sts_grs.grsbuf_ptr < grsoSIZE) continue; \
\
/* digest buffer */ \
sts_grs.grsbuf_ptr = 0; \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
} \
\
/* digest bulk of message */ \
grsoTransform(&sts_grs, in+index, 64-index); \
index += ((64-index)/grsoSIZE)*grsoSIZE; \
\
/* store remaining data in buffer */ \
while (index < 64) { \
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
} \
\
} while (0);
/* groestl512 hash loaded */
/* hash = groestl512(loaded) */
#define GRS_C \
do { \
char *out = hash; \
int i, j = 0; \
unsigned char *s = (unsigned char*)sts_grs.grsstate; \
\
hashbuf[sts_grs.grsbuf_ptr++] = 0x80; \
\
/* pad with '0'-bits */ \
if (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
/* padding requires two blocks */ \
while (sts_grs.grsbuf_ptr < grsoSIZE) { \
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
} \
/* digest first padding block */ \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
sts_grs.grsbuf_ptr = 0; \
} \
while (sts_grs.grsbuf_ptr < grsoSIZE-grsoLENGTHFIELDLEN) { \
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
} \
\
/* length padding */ \
sts_grs.grsblock_counter++; \
sts_grs.grsbuf_ptr = grsoSIZE; \
while (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
hashbuf[--sts_grs.grsbuf_ptr] = (unsigned char)sts_grs.grsblock_counter; \
sts_grs.grsblock_counter >>= 8; \
} \
\
/* digest final padding block */ \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
/* perform output transformation */ \
grsoOutputTransformation(&sts_grs); \
\
/* store hash result in output */ \
for (i = grsoSIZE-grsoDIGESTSIZE; i < grsoSIZE; i++,j++) { \
out[j] = s[i]; \
} \
\
/* zeroise relevant variables and deallocate memory */ \
for (i = 0; i < grsoCOLS; i++) { \
sts_grs.grsstate[i] = 0; \
} \
for (i = 0; i < grsoSIZE; i++) { \
hashbuf[i] = 0; \
} \
} while (0);

View File

@@ -1,57 +0,0 @@
/* hash.c January 2011
*
* Groestl-512 implementation with inline assembly containing mmx and
* sse instructions. Optimized for Opteron.
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
*
* This code is placed in the public domain
*/
#include "algo/groestl/sse2/grso-asm.h"
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grsotab.h"
/* digest up to len bytes of input (full blocks only) */
void grsoTransform(grsoState *ctx,
const unsigned char *in,
unsigned long long len) {
u64 y[grsoCOLS+2] __attribute__ ((aligned (16)));
u64 z[grsoCOLS+2] __attribute__ ((aligned (16)));
u64 *m, *h = (u64*)ctx->grsstate;
int i;
/* increment block counter */
ctx->grsblock_counter += len/grsoSIZE;
/* digest message, one block at a time */
for (; len >= grsoSIZE; len -= grsoSIZE, in += grsoSIZE) {
m = (u64*)in;
for (i = 0; i < grsoCOLS; i++) {
y[i] = m[i];
z[i] = m[i] ^ h[i];
}
grsoQ1024ASM(y);
grsoP1024ASM(z);
/* h' == h + Q(m) + P(h+m) */
for (i = 0; i < grsoCOLS; i++) {
h[i] ^= z[i] ^ y[i];
}
}
}
/* given state h, do h <- P(h)+h */
void grsoOutputTransformation(grsoState *ctx) {
u64 z[grsoCOLS] __attribute__ ((aligned (16)));
int j;
for (j = 0; j < grsoCOLS; j++) {
z[j] = ctx->grsstate[j];
}
grsoP1024ASM(z);
for (j = 0; j < grsoCOLS; j++) {
ctx->grsstate[j] ^= z[j];
}
}

View File

@@ -1,62 +0,0 @@
#ifndef __hash_h
#define __hash_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#include "brg_types.h"
/* some sizes (number of bytes) */
#define grsoROWS 8
#define grsoLENGTHFIELDLEN grsoROWS
#define grsoCOLS 16
#define grsoSIZE (grsoROWS*grsoCOLS)
#define grsoDIGESTSIZE 64
#define grsoROUNDS 14
#define grsoROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&((u64)0xffffffffffffffffULL))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#error
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define grsoU64BIG(a) \
((grsoROTL64(a, 8) & ((u64)0x000000ff000000ffULL)) | \
(grsoROTL64(a,24) & ((u64)0x0000ff000000ff00ULL)) | \
(grsoROTL64(a,40) & ((u64)0x00ff000000ff0000ULL)) | \
(grsoROTL64(a,56) & ((u64)0xff000000ff000000ULL)))
#endif /* IS_LITTLE_ENDIAN */
typedef struct {
u64 grsstate[grsoCOLS]; /* actual state */
u64 grsblock_counter; /* message block counter */
int grsbuf_ptr; /* data buffer pointer */
} grsoState;
//extern int grsoInit(grsoState* ctx);
//extern int grsoUpdate(grsoState* ctx, const unsigned char* in,
// unsigned long long len);
//extern int grsoUpdateq(grsoState* ctx, const unsigned char* in);
//extern int grsoFinal(grsoState* ctx,
// unsigned char* out);
//
//extern int grsohash(unsigned char *out,
// const unsigned char *in,
// unsigned long long len);
/* digest up to len bytes of input (full blocks only) */
void grsoTransform( grsoState *ctx, const unsigned char *in,
unsigned long long len );
/* given state h, do h <- P(h)+h */
void grsoOutputTransformation( grsoState *ctx );
int grso_init ( grsoState* sts_grs );
int grso_update ( grsoState* sts_grs, char* hashbuf, char* hash );
int grso_close ( grsoState *sts_grs, char* hashbuf, char* hash );
#endif /* __hash_h */

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,935 @@
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
/*
* Hamsi implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
//#include "miner.h"
#include "hamsi-hash-4way.h"
#if defined(__AVX2__)
#ifdef __cplusplus
extern "C"{
#endif
/*
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
* table lookup during message expansion (1 to 8, inclusive). If we note
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
* then we will get t tables (where t=ceil(w/n)) of individual size
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
* n=5, there are 7 tables, but the last one uses only two bits on
* input, not five).
*
* Also, we read t rows of r words from RAM. Words in a given row are
* concatenated in RAM in that order, so most of the cost is about
* reading the first row word; comparatively, cache misses are thus
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
*
* When n=1, tables are "special" in that we omit the first entry of
* each table (which always contains 0), so that total table size is
* halved.
*
* We thus have the following (size1 is the cumulative table size of
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
*
* n size1 size2 t1 t2
* ---------------------------------------
* 1 1024 4096 32 64
* 2 2048 8192 16 32
* 3 2688 10880 11 22
* 4 4096 16384 8 16
* 5 6272 25600 7 13
* 6 10368 41984 6 11
* 7 16896 73856 5 10
* 8 32768 131072 4 8
*
* So there is a trade-off: a lower n makes the tables fit better in
* L1 cache, but increases the number of memory accesses. The optimal
* value depends on the amount of available L1 cache and the relative
* impact of a cache miss.
*
* Experimentally, in ideal benchmark conditions (which are not necessarily
* realistic with regards to L1 cache contention), it seems that n=8 is
* the best value on "big" architectures (those with 32 kB or more of L1
* cache), while n=4 is better on "small" architectures. This was tested
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
* (8 kB L1 cache).
*
* Note: with n=1, the 32 tables (actually implemented as one big table)
* are read entirely and sequentially, regardless of the input data,
* thus avoiding any data-dependent table access pattern.
*/
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
//#include "hamsi-helper-4way.c"
static const sph_u32 IV512[] = {
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
SPH_C32(0x6769756d)
};
static const sph_u32 alpha_n[] = {
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
};
static const sph_u32 alpha_f[] = {
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
};
// imported from hamsi helper
/* Note: this table lists bits within each byte from least
siginificant to most significant. */
static const sph_u32 T512[64][16] = {
{ SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000),
SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9),
SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030),
SPH_C32(0xe7250000), SPH_C32(0x2f840000), SPH_C32(0x264f0000),
SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137), SPH_C32(0x509f6984),
SPH_C32(0x9e69af68) },
{ SPH_C32(0xc96b0030), SPH_C32(0xe7250000), SPH_C32(0x2f840000),
SPH_C32(0x264f0000), SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137),
SPH_C32(0x509f6984), SPH_C32(0x9e69af68), SPH_C32(0x26600240),
SPH_C32(0xddd80000), SPH_C32(0x722a0000), SPH_C32(0x4f060000),
SPH_C32(0x936667ff), SPH_C32(0x29f944ce), SPH_C32(0x368b63d5),
SPH_C32(0x0c26f262) },
{ SPH_C32(0x145a3c00), SPH_C32(0xb9e90000), SPH_C32(0x61270000),
SPH_C32(0xf1610000), SPH_C32(0xce613d6c), SPH_C32(0xb0493d78),
SPH_C32(0x47a96720), SPH_C32(0xe18e24c5), SPH_C32(0x23671400),
SPH_C32(0xc8b90000), SPH_C32(0xf4c70000), SPH_C32(0xfb750000),
SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549), SPH_C32(0x02c40a3f),
SPH_C32(0xdc24e61f) },
{ SPH_C32(0x23671400), SPH_C32(0xc8b90000), SPH_C32(0xf4c70000),
SPH_C32(0xfb750000), SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549),
SPH_C32(0x02c40a3f), SPH_C32(0xdc24e61f), SPH_C32(0x373d2800),
SPH_C32(0x71500000), SPH_C32(0x95e00000), SPH_C32(0x0a140000),
SPH_C32(0xbdac1909), SPH_C32(0x48ef9831), SPH_C32(0x456d6d1f),
SPH_C32(0x3daac2da) },
{ SPH_C32(0x54285c00), SPH_C32(0xeaed0000), SPH_C32(0xc5d60000),
SPH_C32(0xa1c50000), SPH_C32(0xb3a26770), SPH_C32(0x94a5c4e1),
SPH_C32(0x6bb0419d), SPH_C32(0x551b3782), SPH_C32(0x9cbb1800),
SPH_C32(0xb0d30000), SPH_C32(0x92510000), SPH_C32(0xed930000),
SPH_C32(0x593a4345), SPH_C32(0xe114d5f4), SPH_C32(0x430633da),
SPH_C32(0x78cace29) },
{ SPH_C32(0x9cbb1800), SPH_C32(0xb0d30000), SPH_C32(0x92510000),
SPH_C32(0xed930000), SPH_C32(0x593a4345), SPH_C32(0xe114d5f4),
SPH_C32(0x430633da), SPH_C32(0x78cace29), SPH_C32(0xc8934400),
SPH_C32(0x5a3e0000), SPH_C32(0x57870000), SPH_C32(0x4c560000),
SPH_C32(0xea982435), SPH_C32(0x75b11115), SPH_C32(0x28b67247),
SPH_C32(0x2dd1f9ab) },
{ SPH_C32(0x29449c00), SPH_C32(0x64e70000), SPH_C32(0xf24b0000),
SPH_C32(0xc2f30000), SPH_C32(0x0ede4e8f), SPH_C32(0x56c23745),
SPH_C32(0xf3e04259), SPH_C32(0x8d0d9ec4), SPH_C32(0x466d0c00),
SPH_C32(0x08620000), SPH_C32(0xdd5d0000), SPH_C32(0xbadd0000),
SPH_C32(0x6a927942), SPH_C32(0x441f2b93), SPH_C32(0x218ace6f),
SPH_C32(0xbf2c0be2) },
{ SPH_C32(0x466d0c00), SPH_C32(0x08620000), SPH_C32(0xdd5d0000),
SPH_C32(0xbadd0000), SPH_C32(0x6a927942), SPH_C32(0x441f2b93),
SPH_C32(0x218ace6f), SPH_C32(0xbf2c0be2), SPH_C32(0x6f299000),
SPH_C32(0x6c850000), SPH_C32(0x2f160000), SPH_C32(0x782e0000),
SPH_C32(0x644c37cd), SPH_C32(0x12dd1cd6), SPH_C32(0xd26a8c36),
SPH_C32(0x32219526) },
{ SPH_C32(0xf6800005), SPH_C32(0x3443c000), SPH_C32(0x24070000),
SPH_C32(0x8f3d0000), SPH_C32(0x21373bfb), SPH_C32(0x0ab8d5ae),
SPH_C32(0xcdc58b19), SPH_C32(0xd795ba31), SPH_C32(0xa67f0001),
SPH_C32(0x71378000), SPH_C32(0x19fc0000), SPH_C32(0x96db0000),
SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3), SPH_C32(0x2c6d478f),
SPH_C32(0xac8e6c88) },
{ SPH_C32(0xa67f0001), SPH_C32(0x71378000), SPH_C32(0x19fc0000),
SPH_C32(0x96db0000), SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3),
SPH_C32(0x2c6d478f), SPH_C32(0xac8e6c88), SPH_C32(0x50ff0004),
SPH_C32(0x45744000), SPH_C32(0x3dfb0000), SPH_C32(0x19e60000),
SPH_C32(0x1bbc5606), SPH_C32(0xe1727b5d), SPH_C32(0xe1a8cc96),
SPH_C32(0x7b1bd6b9) },
{ SPH_C32(0xf7750009), SPH_C32(0xcf3cc000), SPH_C32(0xc3d60000),
SPH_C32(0x04920000), SPH_C32(0x029519a9), SPH_C32(0xf8e836ba),
SPH_C32(0x7a87f14e), SPH_C32(0x9e16981a), SPH_C32(0xd46a0000),
SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000), SPH_C32(0x4a290000),
SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c), SPH_C32(0x98369604),
SPH_C32(0xf746c320) },
{ SPH_C32(0xd46a0000), SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000),
SPH_C32(0x4a290000), SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c),
SPH_C32(0x98369604), SPH_C32(0xf746c320), SPH_C32(0x231f0009),
SPH_C32(0x42f40000), SPH_C32(0x66790000), SPH_C32(0x4ebb0000),
SPH_C32(0xfedb5bd3), SPH_C32(0x315cb0d6), SPH_C32(0xe2b1674a),
SPH_C32(0x69505b3a) },
{ SPH_C32(0x774400f0), SPH_C32(0xf15a0000), SPH_C32(0xf5b20000),
SPH_C32(0x34140000), SPH_C32(0x89377e8c), SPH_C32(0x5a8bec25),
SPH_C32(0x0bc3cd1e), SPH_C32(0xcf3775cb), SPH_C32(0xf46c0050),
SPH_C32(0x96180000), SPH_C32(0x14a50000), SPH_C32(0x031f0000),
SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19), SPH_C32(0x9ca470d2),
SPH_C32(0x8a341574) },
{ SPH_C32(0xf46c0050), SPH_C32(0x96180000), SPH_C32(0x14a50000),
SPH_C32(0x031f0000), SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19),
SPH_C32(0x9ca470d2), SPH_C32(0x8a341574), SPH_C32(0x832800a0),
SPH_C32(0x67420000), SPH_C32(0xe1170000), SPH_C32(0x370b0000),
SPH_C32(0xcba30034), SPH_C32(0x3c34923c), SPH_C32(0x9767bdcc),
SPH_C32(0x450360bf) },
{ SPH_C32(0xe8870170), SPH_C32(0x9d720000), SPH_C32(0x12db0000),
SPH_C32(0xd4220000), SPH_C32(0xf2886b27), SPH_C32(0xa921e543),
SPH_C32(0x4ef8b518), SPH_C32(0x618813b1), SPH_C32(0xb4370060),
SPH_C32(0x0c4c0000), SPH_C32(0x56c20000), SPH_C32(0x5cae0000),
SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825), SPH_C32(0x1b365f3d),
SPH_C32(0xf3d45758) },
{ SPH_C32(0xb4370060), SPH_C32(0x0c4c0000), SPH_C32(0x56c20000),
SPH_C32(0x5cae0000), SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825),
SPH_C32(0x1b365f3d), SPH_C32(0xf3d45758), SPH_C32(0x5cb00110),
SPH_C32(0x913e0000), SPH_C32(0x44190000), SPH_C32(0x888c0000),
SPH_C32(0x66dc7418), SPH_C32(0x921f1d66), SPH_C32(0x55ceea25),
SPH_C32(0x925c44e9) },
{ SPH_C32(0x0c720000), SPH_C32(0x49e50f00), SPH_C32(0x42790000),
SPH_C32(0x5cea0000), SPH_C32(0x33aa301a), SPH_C32(0x15822514),
SPH_C32(0x95a34b7b), SPH_C32(0xb44b0090), SPH_C32(0xfe220000),
SPH_C32(0xa7580500), SPH_C32(0x25d10000), SPH_C32(0xf7600000),
SPH_C32(0x893178da), SPH_C32(0x1fd4f860), SPH_C32(0x4ed0a315),
SPH_C32(0xa123ff9f) },
{ SPH_C32(0xfe220000), SPH_C32(0xa7580500), SPH_C32(0x25d10000),
SPH_C32(0xf7600000), SPH_C32(0x893178da), SPH_C32(0x1fd4f860),
SPH_C32(0x4ed0a315), SPH_C32(0xa123ff9f), SPH_C32(0xf2500000),
SPH_C32(0xeebd0a00), SPH_C32(0x67a80000), SPH_C32(0xab8a0000),
SPH_C32(0xba9b48c0), SPH_C32(0x0a56dd74), SPH_C32(0xdb73e86e),
SPH_C32(0x1568ff0f) },
{ SPH_C32(0x45180000), SPH_C32(0xa5b51700), SPH_C32(0xf96a0000),
SPH_C32(0x3b480000), SPH_C32(0x1ecc142c), SPH_C32(0x231395d6),
SPH_C32(0x16bca6b0), SPH_C32(0xdf33f4df), SPH_C32(0xb83d0000),
SPH_C32(0x16710600), SPH_C32(0x379a0000), SPH_C32(0xf5b10000),
SPH_C32(0x228161ac), SPH_C32(0xae48f145), SPH_C32(0x66241616),
SPH_C32(0xc5c1eb3e) },
{ SPH_C32(0xb83d0000), SPH_C32(0x16710600), SPH_C32(0x379a0000),
SPH_C32(0xf5b10000), SPH_C32(0x228161ac), SPH_C32(0xae48f145),
SPH_C32(0x66241616), SPH_C32(0xc5c1eb3e), SPH_C32(0xfd250000),
SPH_C32(0xb3c41100), SPH_C32(0xcef00000), SPH_C32(0xcef90000),
SPH_C32(0x3c4d7580), SPH_C32(0x8d5b6493), SPH_C32(0x7098b0a6),
SPH_C32(0x1af21fe1) },
{ SPH_C32(0x75a40000), SPH_C32(0xc28b2700), SPH_C32(0x94a40000),
SPH_C32(0x90f50000), SPH_C32(0xfb7857e0), SPH_C32(0x49ce0bae),
SPH_C32(0x1767c483), SPH_C32(0xaedf667e), SPH_C32(0xd1660000),
SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000), SPH_C32(0xf6940000),
SPH_C32(0x03024527), SPH_C32(0xcf70fcf2), SPH_C32(0xb4431b17),
SPH_C32(0x857f3c2b) },
{ SPH_C32(0xd1660000), SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000),
SPH_C32(0xf6940000), SPH_C32(0x03024527), SPH_C32(0xcf70fcf2),
SPH_C32(0xb4431b17), SPH_C32(0x857f3c2b), SPH_C32(0xa4c20000),
SPH_C32(0xd9372400), SPH_C32(0x0a480000), SPH_C32(0x66610000),
SPH_C32(0xf87a12c7), SPH_C32(0x86bef75c), SPH_C32(0xa324df94),
SPH_C32(0x2ba05a55) },
{ SPH_C32(0x75c90003), SPH_C32(0x0e10c000), SPH_C32(0xd1200000),
SPH_C32(0xbaea0000), SPH_C32(0x8bc42f3e), SPH_C32(0x8758b757),
SPH_C32(0xbb28761d), SPH_C32(0x00b72e2b), SPH_C32(0xeecf0001),
SPH_C32(0x6f564000), SPH_C32(0xf33e0000), SPH_C32(0xa79e0000),
SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5), SPH_C32(0x4a3b40ba),
SPH_C32(0xfeabf254) },
{ SPH_C32(0xeecf0001), SPH_C32(0x6f564000), SPH_C32(0xf33e0000),
SPH_C32(0xa79e0000), SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5),
SPH_C32(0x4a3b40ba), SPH_C32(0xfeabf254), SPH_C32(0x9b060002),
SPH_C32(0x61468000), SPH_C32(0x221e0000), SPH_C32(0x1d740000),
SPH_C32(0x36715d27), SPH_C32(0x30495c92), SPH_C32(0xf11336a7),
SPH_C32(0xfe1cdc7f) },
{ SPH_C32(0x86790000), SPH_C32(0x3f390002), SPH_C32(0xe19ae000),
SPH_C32(0x98560000), SPH_C32(0x9565670e), SPH_C32(0x4e88c8ea),
SPH_C32(0xd3dd4944), SPH_C32(0x161ddab9), SPH_C32(0x30b70000),
SPH_C32(0xe5d00000), SPH_C32(0xf4f46000), SPH_C32(0x42c40000),
SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460), SPH_C32(0x21afa1ea),
SPH_C32(0xb0a51834) },
{ SPH_C32(0x30b70000), SPH_C32(0xe5d00000), SPH_C32(0xf4f46000),
SPH_C32(0x42c40000), SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460),
SPH_C32(0x21afa1ea), SPH_C32(0xb0a51834), SPH_C32(0xb6ce0000),
SPH_C32(0xdae90002), SPH_C32(0x156e8000), SPH_C32(0xda920000),
SPH_C32(0xf6dd5a64), SPH_C32(0x36325c8a), SPH_C32(0xf272e8ae),
SPH_C32(0xa6b8c28d) },
{ SPH_C32(0x14190000), SPH_C32(0x23ca003c), SPH_C32(0x50df0000),
SPH_C32(0x44b60000), SPH_C32(0x1b6c67b0), SPH_C32(0x3cf3ac75),
SPH_C32(0x61e610b0), SPH_C32(0xdbcadb80), SPH_C32(0xe3430000),
SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000), SPH_C32(0xaa4e0000),
SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15), SPH_C32(0x123db156),
SPH_C32(0x3a4e99d7) },
{ SPH_C32(0xe3430000), SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000),
SPH_C32(0xaa4e0000), SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15),
SPH_C32(0x123db156), SPH_C32(0x3a4e99d7), SPH_C32(0xf75a0000),
SPH_C32(0x19840028), SPH_C32(0xa2190000), SPH_C32(0xeef80000),
SPH_C32(0xc0722516), SPH_C32(0x19981260), SPH_C32(0x73dba1e6),
SPH_C32(0xe1844257) },
{ SPH_C32(0x54500000), SPH_C32(0x0671005c), SPH_C32(0x25ae0000),
SPH_C32(0x6a1e0000), SPH_C32(0x2ea54edf), SPH_C32(0x664e8512),
SPH_C32(0xbfba18c3), SPH_C32(0x7e715d17), SPH_C32(0xbc8d0000),
SPH_C32(0xfc3b0018), SPH_C32(0x19830000), SPH_C32(0xd10b0000),
SPH_C32(0xae1878c4), SPH_C32(0x42a69856), SPH_C32(0x0012da37),
SPH_C32(0x2c3b504e) },
{ SPH_C32(0xbc8d0000), SPH_C32(0xfc3b0018), SPH_C32(0x19830000),
SPH_C32(0xd10b0000), SPH_C32(0xae1878c4), SPH_C32(0x42a69856),
SPH_C32(0x0012da37), SPH_C32(0x2c3b504e), SPH_C32(0xe8dd0000),
SPH_C32(0xfa4a0044), SPH_C32(0x3c2d0000), SPH_C32(0xbb150000),
SPH_C32(0x80bd361b), SPH_C32(0x24e81d44), SPH_C32(0xbfa8c2f4),
SPH_C32(0x524a0d59) },
{ SPH_C32(0x69510000), SPH_C32(0xd4e1009c), SPH_C32(0xc3230000),
SPH_C32(0xac2f0000), SPH_C32(0xe4950bae), SPH_C32(0xcea415dc),
SPH_C32(0x87ec287c), SPH_C32(0xbce1a3ce), SPH_C32(0xc6730000),
SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000), SPH_C32(0x218d0000),
SPH_C32(0x23111587), SPH_C32(0x7913512f), SPH_C32(0x1d28ac88),
SPH_C32(0x378dd173) },
{ SPH_C32(0xc6730000), SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000),
SPH_C32(0x218d0000), SPH_C32(0x23111587), SPH_C32(0x7913512f),
SPH_C32(0x1d28ac88), SPH_C32(0x378dd173), SPH_C32(0xaf220000),
SPH_C32(0x7b6c0090), SPH_C32(0x67e20000), SPH_C32(0x8da20000),
SPH_C32(0xc7841e29), SPH_C32(0xb7b744f3), SPH_C32(0x9ac484f4),
SPH_C32(0x8b6c72bd) },
{ SPH_C32(0xcc140000), SPH_C32(0xa5630000), SPH_C32(0x5ab90780),
SPH_C32(0x3b500000), SPH_C32(0x4bd013ff), SPH_C32(0x879b3418),
SPH_C32(0x694348c1), SPH_C32(0xca5a87fe), SPH_C32(0x819e0000),
SPH_C32(0xec570000), SPH_C32(0x66320280), SPH_C32(0x95f30000),
SPH_C32(0x5da92802), SPH_C32(0x48f43cbc), SPH_C32(0xe65aa22d),
SPH_C32(0x8e67b7fa) },
{ SPH_C32(0x819e0000), SPH_C32(0xec570000), SPH_C32(0x66320280),
SPH_C32(0x95f30000), SPH_C32(0x5da92802), SPH_C32(0x48f43cbc),
SPH_C32(0xe65aa22d), SPH_C32(0x8e67b7fa), SPH_C32(0x4d8a0000),
SPH_C32(0x49340000), SPH_C32(0x3c8b0500), SPH_C32(0xaea30000),
SPH_C32(0x16793bfd), SPH_C32(0xcf6f08a4), SPH_C32(0x8f19eaec),
SPH_C32(0x443d3004) },
{ SPH_C32(0x78230000), SPH_C32(0x12fc0000), SPH_C32(0xa93a0b80),
SPH_C32(0x90a50000), SPH_C32(0x713e2879), SPH_C32(0x7ee98924),
SPH_C32(0xf08ca062), SPH_C32(0x636f8bab), SPH_C32(0x02af0000),
SPH_C32(0xb7280000), SPH_C32(0xba1c0300), SPH_C32(0x56980000),
SPH_C32(0xba8d45d3), SPH_C32(0x8048c667), SPH_C32(0xa95c149a),
SPH_C32(0xf4f6ea7b) },
{ SPH_C32(0x02af0000), SPH_C32(0xb7280000), SPH_C32(0xba1c0300),
SPH_C32(0x56980000), SPH_C32(0xba8d45d3), SPH_C32(0x8048c667),
SPH_C32(0xa95c149a), SPH_C32(0xf4f6ea7b), SPH_C32(0x7a8c0000),
SPH_C32(0xa5d40000), SPH_C32(0x13260880), SPH_C32(0xc63d0000),
SPH_C32(0xcbb36daa), SPH_C32(0xfea14f43), SPH_C32(0x59d0b4f8),
SPH_C32(0x979961d0) },
{ SPH_C32(0xac480000), SPH_C32(0x1ba60000), SPH_C32(0x45fb1380),
SPH_C32(0x03430000), SPH_C32(0x5a85316a), SPH_C32(0x1fb250b6),
SPH_C32(0xfe72c7fe), SPH_C32(0x91e478f6), SPH_C32(0x1e4e0000),
SPH_C32(0xdecf0000), SPH_C32(0x6df80180), SPH_C32(0x77240000),
SPH_C32(0xec47079e), SPH_C32(0xf4a0694e), SPH_C32(0xcda31812),
SPH_C32(0x98aa496e) },
{ SPH_C32(0x1e4e0000), SPH_C32(0xdecf0000), SPH_C32(0x6df80180),
SPH_C32(0x77240000), SPH_C32(0xec47079e), SPH_C32(0xf4a0694e),
SPH_C32(0xcda31812), SPH_C32(0x98aa496e), SPH_C32(0xb2060000),
SPH_C32(0xc5690000), SPH_C32(0x28031200), SPH_C32(0x74670000),
SPH_C32(0xb6c236f4), SPH_C32(0xeb1239f8), SPH_C32(0x33d1dfec),
SPH_C32(0x094e3198) },
{ SPH_C32(0xaec30000), SPH_C32(0x9c4f0001), SPH_C32(0x79d1e000),
SPH_C32(0x2c150000), SPH_C32(0x45cc75b3), SPH_C32(0x6650b736),
SPH_C32(0xab92f78f), SPH_C32(0xa312567b), SPH_C32(0xdb250000),
SPH_C32(0x09290000), SPH_C32(0x49aac000), SPH_C32(0x81e10000),
SPH_C32(0xcafe6b59), SPH_C32(0x42793431), SPH_C32(0x43566b76),
SPH_C32(0xe86cba2e) },
{ SPH_C32(0xdb250000), SPH_C32(0x09290000), SPH_C32(0x49aac000),
SPH_C32(0x81e10000), SPH_C32(0xcafe6b59), SPH_C32(0x42793431),
SPH_C32(0x43566b76), SPH_C32(0xe86cba2e), SPH_C32(0x75e60000),
SPH_C32(0x95660001), SPH_C32(0x307b2000), SPH_C32(0xadf40000),
SPH_C32(0x8f321eea), SPH_C32(0x24298307), SPH_C32(0xe8c49cf9),
SPH_C32(0x4b7eec55) },
{ SPH_C32(0x58430000), SPH_C32(0x807e0000), SPH_C32(0x78330001),
SPH_C32(0xc66b3800), SPH_C32(0xe7375cdc), SPH_C32(0x79ad3fdd),
SPH_C32(0xac73fe6f), SPH_C32(0x3a4479b1), SPH_C32(0x1d5a0000),
SPH_C32(0x2b720000), SPH_C32(0x488d0000), SPH_C32(0xaf611800),
SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0), SPH_C32(0x81a20429),
SPH_C32(0x1e7536a6) },
{ SPH_C32(0x1d5a0000), SPH_C32(0x2b720000), SPH_C32(0x488d0000),
SPH_C32(0xaf611800), SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0),
SPH_C32(0x81a20429), SPH_C32(0x1e7536a6), SPH_C32(0x45190000),
SPH_C32(0xab0c0000), SPH_C32(0x30be0001), SPH_C32(0x690a2000),
SPH_C32(0xc2fc7219), SPH_C32(0xb1d4800d), SPH_C32(0x2dd1fa46),
SPH_C32(0x24314f17) },
{ SPH_C32(0xa53b0000), SPH_C32(0x14260000), SPH_C32(0x4e30001e),
SPH_C32(0x7cae0000), SPH_C32(0x8f9e0dd5), SPH_C32(0x78dfaa3d),
SPH_C32(0xf73168d8), SPH_C32(0x0b1b4946), SPH_C32(0x07ed0000),
SPH_C32(0xb2500000), SPH_C32(0x8774000a), SPH_C32(0x970d0000),
SPH_C32(0x437223ae), SPH_C32(0x48c76ea4), SPH_C32(0xf4786222),
SPH_C32(0x9075b1ce) },
{ SPH_C32(0x07ed0000), SPH_C32(0xb2500000), SPH_C32(0x8774000a),
SPH_C32(0x970d0000), SPH_C32(0x437223ae), SPH_C32(0x48c76ea4),
SPH_C32(0xf4786222), SPH_C32(0x9075b1ce), SPH_C32(0xa2d60000),
SPH_C32(0xa6760000), SPH_C32(0xc9440014), SPH_C32(0xeba30000),
SPH_C32(0xccec2e7b), SPH_C32(0x3018c499), SPH_C32(0x03490afa),
SPH_C32(0x9b6ef888) },
{ SPH_C32(0x88980000), SPH_C32(0x1f940000), SPH_C32(0x7fcf002e),
SPH_C32(0xfb4e0000), SPH_C32(0xf158079a), SPH_C32(0x61ae9167),
SPH_C32(0xa895706c), SPH_C32(0xe6107494), SPH_C32(0x0bc20000),
SPH_C32(0xdb630000), SPH_C32(0x7e88000c), SPH_C32(0x15860000),
SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43), SPH_C32(0xf460449e),
SPH_C32(0xd8b61463) },
{ SPH_C32(0x0bc20000), SPH_C32(0xdb630000), SPH_C32(0x7e88000c),
SPH_C32(0x15860000), SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43),
SPH_C32(0xf460449e), SPH_C32(0xd8b61463), SPH_C32(0x835a0000),
SPH_C32(0xc4f70000), SPH_C32(0x01470022), SPH_C32(0xeec80000),
SPH_C32(0x60a54f69), SPH_C32(0x142f2a24), SPH_C32(0x5cf534f2),
SPH_C32(0x3ea660f7) },
{ SPH_C32(0x52500000), SPH_C32(0x29540000), SPH_C32(0x6a61004e),
SPH_C32(0xf0ff0000), SPH_C32(0x9a317eec), SPH_C32(0x452341ce),
SPH_C32(0xcf568fe5), SPH_C32(0x5303130f), SPH_C32(0x538d0000),
SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006), SPH_C32(0x56ff0000),
SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9), SPH_C32(0xa9444018),
SPH_C32(0x7f975691) },
{ SPH_C32(0x538d0000), SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006),
SPH_C32(0x56ff0000), SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9),
SPH_C32(0xa9444018), SPH_C32(0x7f975691), SPH_C32(0x01dd0000),
SPH_C32(0x80a80000), SPH_C32(0xf4960048), SPH_C32(0xa6000000),
SPH_C32(0x90d57ea2), SPH_C32(0xd7e68c37), SPH_C32(0x6612cffd),
SPH_C32(0x2c94459e) },
{ SPH_C32(0xe6280000), SPH_C32(0x4c4b0000), SPH_C32(0xa8550000),
SPH_C32(0xd3d002e0), SPH_C32(0xd86130b8), SPH_C32(0x98a7b0da),
SPH_C32(0x289506b4), SPH_C32(0xd75a4897), SPH_C32(0xf0c50000),
SPH_C32(0x59230000), SPH_C32(0x45820000), SPH_C32(0xe18d00c0),
SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699), SPH_C32(0xcbe0fe1c),
SPH_C32(0x56a7b19f) },
{ SPH_C32(0xf0c50000), SPH_C32(0x59230000), SPH_C32(0x45820000),
SPH_C32(0xe18d00c0), SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699),
SPH_C32(0xcbe0fe1c), SPH_C32(0x56a7b19f), SPH_C32(0x16ed0000),
SPH_C32(0x15680000), SPH_C32(0xedd70000), SPH_C32(0x325d0220),
SPH_C32(0xe30c3689), SPH_C32(0x5a4ae643), SPH_C32(0xe375f8a8),
SPH_C32(0x81fdf908) },
{ SPH_C32(0xb4310000), SPH_C32(0x77330000), SPH_C32(0xb15d0000),
SPH_C32(0x7fd004e0), SPH_C32(0x78a26138), SPH_C32(0xd116c35d),
SPH_C32(0xd256d489), SPH_C32(0x4e6f74de), SPH_C32(0xe3060000),
SPH_C32(0xbdc10000), SPH_C32(0x87130000), SPH_C32(0xbff20060),
SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751), SPH_C32(0x73c5ab06),
SPH_C32(0x5bd61539) },
{ SPH_C32(0xe3060000), SPH_C32(0xbdc10000), SPH_C32(0x87130000),
SPH_C32(0xbff20060), SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751),
SPH_C32(0x73c5ab06), SPH_C32(0x5bd61539), SPH_C32(0x57370000),
SPH_C32(0xcaf20000), SPH_C32(0x364e0000), SPH_C32(0xc0220480),
SPH_C32(0x56186b22), SPH_C32(0x5ca3f40c), SPH_C32(0xa1937f8f),
SPH_C32(0x15b961e7) },
{ SPH_C32(0x02f20000), SPH_C32(0xa2810000), SPH_C32(0x873f0000),
SPH_C32(0xe36c7800), SPH_C32(0x1e1d74ef), SPH_C32(0x073d2bd6),
SPH_C32(0xc4c23237), SPH_C32(0x7f32259e), SPH_C32(0xbadd0000),
SPH_C32(0x13ad0000), SPH_C32(0xb7e70000), SPH_C32(0xf7282800),
SPH_C32(0xdf45144d), SPH_C32(0x361ac33a), SPH_C32(0xea5a8d14),
SPH_C32(0x2a2c18f0) },
{ SPH_C32(0xbadd0000), SPH_C32(0x13ad0000), SPH_C32(0xb7e70000),
SPH_C32(0xf7282800), SPH_C32(0xdf45144d), SPH_C32(0x361ac33a),
SPH_C32(0xea5a8d14), SPH_C32(0x2a2c18f0), SPH_C32(0xb82f0000),
SPH_C32(0xb12c0000), SPH_C32(0x30d80000), SPH_C32(0x14445000),
SPH_C32(0xc15860a2), SPH_C32(0x3127e8ec), SPH_C32(0x2e98bf23),
SPH_C32(0x551e3d6e) },
{ SPH_C32(0x1e6c0000), SPH_C32(0xc4420000), SPH_C32(0x8a2e0000),
SPH_C32(0xbcb6b800), SPH_C32(0x2c4413b6), SPH_C32(0x8bfdd3da),
SPH_C32(0x6a0c1bc8), SPH_C32(0xb99dc2eb), SPH_C32(0x92560000),
SPH_C32(0x1eda0000), SPH_C32(0xea510000), SPH_C32(0xe8b13000),
SPH_C32(0xa93556a5), SPH_C32(0xebfb6199), SPH_C32(0xb15c2254),
SPH_C32(0x33c5244f) },
{ SPH_C32(0x92560000), SPH_C32(0x1eda0000), SPH_C32(0xea510000),
SPH_C32(0xe8b13000), SPH_C32(0xa93556a5), SPH_C32(0xebfb6199),
SPH_C32(0xb15c2254), SPH_C32(0x33c5244f), SPH_C32(0x8c3a0000),
SPH_C32(0xda980000), SPH_C32(0x607f0000), SPH_C32(0x54078800),
SPH_C32(0x85714513), SPH_C32(0x6006b243), SPH_C32(0xdb50399c),
SPH_C32(0x8a58e6a4) },
{ SPH_C32(0x033d0000), SPH_C32(0x08b30000), SPH_C32(0xf33a0000),
SPH_C32(0x3ac20007), SPH_C32(0x51298a50), SPH_C32(0x6b6e661f),
SPH_C32(0x0ea5cfe3), SPH_C32(0xe6da7ffe), SPH_C32(0xa8da0000),
SPH_C32(0x96be0000), SPH_C32(0x5c1d0000), SPH_C32(0x07da0002),
SPH_C32(0x7d669583), SPH_C32(0x1f98708a), SPH_C32(0xbb668808),
SPH_C32(0xda878000) },
{ SPH_C32(0xa8da0000), SPH_C32(0x96be0000), SPH_C32(0x5c1d0000),
SPH_C32(0x07da0002), SPH_C32(0x7d669583), SPH_C32(0x1f98708a),
SPH_C32(0xbb668808), SPH_C32(0xda878000), SPH_C32(0xabe70000),
SPH_C32(0x9e0d0000), SPH_C32(0xaf270000), SPH_C32(0x3d180005),
SPH_C32(0x2c4f1fd3), SPH_C32(0x74f61695), SPH_C32(0xb5c347eb),
SPH_C32(0x3c5dfffe) },
{ SPH_C32(0x01930000), SPH_C32(0xe7820000), SPH_C32(0xedfb0000),
SPH_C32(0xcf0c000b), SPH_C32(0x8dd08d58), SPH_C32(0xbca3b42e),
SPH_C32(0x063661e1), SPH_C32(0x536f9e7b), SPH_C32(0x92280000),
SPH_C32(0xdc850000), SPH_C32(0x57fa0000), SPH_C32(0x56dc0003),
SPH_C32(0xbae92316), SPH_C32(0x5aefa30c), SPH_C32(0x90cef752),
SPH_C32(0x7b1675d7) },
{ SPH_C32(0x92280000), SPH_C32(0xdc850000), SPH_C32(0x57fa0000),
SPH_C32(0x56dc0003), SPH_C32(0xbae92316), SPH_C32(0x5aefa30c),
SPH_C32(0x90cef752), SPH_C32(0x7b1675d7), SPH_C32(0x93bb0000),
SPH_C32(0x3b070000), SPH_C32(0xba010000), SPH_C32(0x99d00008),
SPH_C32(0x3739ae4e), SPH_C32(0xe64c1722), SPH_C32(0x96f896b3),
SPH_C32(0x2879ebac) },
{ SPH_C32(0x5fa80000), SPH_C32(0x56030000), SPH_C32(0x43ae0000),
SPH_C32(0x64f30013), SPH_C32(0x257e86bf), SPH_C32(0x1311944e),
SPH_C32(0x541e95bf), SPH_C32(0x8ea4db69), SPH_C32(0x00440000),
SPH_C32(0x7f480000), SPH_C32(0xda7c0000), SPH_C32(0x2a230001),
SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87), SPH_C32(0x030a9e60),
SPH_C32(0xbe0a679e) },
{ SPH_C32(0x00440000), SPH_C32(0x7f480000), SPH_C32(0xda7c0000),
SPH_C32(0x2a230001), SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87),
SPH_C32(0x030a9e60), SPH_C32(0xbe0a679e), SPH_C32(0x5fec0000),
SPH_C32(0x294b0000), SPH_C32(0x99d20000), SPH_C32(0x4ed00012),
SPH_C32(0x1ed34f73), SPH_C32(0xbaa708c9), SPH_C32(0x57140bdf),
SPH_C32(0x30aebcf7) },
{ SPH_C32(0xee930000), SPH_C32(0xd6070000), SPH_C32(0x92c10000),
SPH_C32(0x2b9801e0), SPH_C32(0x9451287c), SPH_C32(0x3b6cfb57),
SPH_C32(0x45312374), SPH_C32(0x201f6a64), SPH_C32(0x7b280000),
SPH_C32(0x57420000), SPH_C32(0xa9e50000), SPH_C32(0x634300a0),
SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb), SPH_C32(0x27f83b03),
SPH_C32(0xc7ff60f0) },
{ SPH_C32(0x7b280000), SPH_C32(0x57420000), SPH_C32(0xa9e50000),
SPH_C32(0x634300a0), SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb),
SPH_C32(0x27f83b03), SPH_C32(0xc7ff60f0), SPH_C32(0x95bb0000),
SPH_C32(0x81450000), SPH_C32(0x3b240000), SPH_C32(0x48db0140),
SPH_C32(0x0a8a6c53), SPH_C32(0x56f56eec), SPH_C32(0x62c91877),
SPH_C32(0xe7e00a94) }
};
#define INPUT_BIG \
do { \
__m256i db = *buf; \
const sph_u32 *tp = &T512[0][0]; \
m0 = m256_zero; \
m1 = m256_zero; \
m2 = m256_zero; \
m3 = m256_zero; \
m4 = m256_zero; \
m5 = m256_zero; \
m6 = m256_zero; \
m7 = m256_zero; \
for ( int u = 0; u < 64; u++ ) \
{ \
__m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
dm = mm256_negate_32( _mm256_or_si256( dm, \
_mm256_slli_epi64( dm, 32 ) ) ); \
m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x1], tp[0x0], tp[0x1], tp[0x0], \
tp[0x1], tp[0x0], tp[0x1], tp[0x0] ) ) ); \
m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x3], tp[0x2], tp[0x3], tp[0x2], \
tp[0x3], tp[0x2], tp[0x3], tp[0x2] ) ) ); \
m2 = _mm256_xor_si256( m2, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x5], tp[0x4], tp[0x5], tp[0x4], \
tp[0x5], tp[0x4], tp[0x5], tp[0x4] ) ) ); \
m3 = _mm256_xor_si256( m3, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x7], tp[0x6], tp[0x7], tp[0x6], \
tp[0x7], tp[0x6], tp[0x7], tp[0x6] ) ) ); \
m4 = _mm256_xor_si256( m4, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0x9], tp[0x8], tp[0x9], tp[0x8], \
tp[0x9], tp[0x8], tp[0x9], tp[0x8] ) ) ); \
m5 = _mm256_xor_si256( m5, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0xB], tp[0xA], tp[0xB], tp[0xA], \
tp[0xB], tp[0xA], tp[0xB], tp[0xA] ) ) ); \
m6 = _mm256_xor_si256( m6, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0xD], tp[0xC], tp[0xD], tp[0xC], \
tp[0xD], tp[0xC], tp[0xD], tp[0xC] ) ) ); \
m7 = _mm256_xor_si256( m7, _mm256_and_si256( dm, \
_mm256_set_epi32( tp[0xF], tp[0xE], tp[0xF], tp[0xE], \
tp[0xF], tp[0xE], tp[0xF], tp[0xE] ) ) ); \
tp += 0x10; \
db = _mm256_srli_epi64( db, 1 ); \
} \
} while (0)
#define SBOX( a, b, c, d ) \
do { \
__m256i t; \
t = a; \
a = _mm256_and_si256( a, c ); \
a = _mm256_xor_si256( a, d ); \
c = _mm256_xor_si256( c, b ); \
c = _mm256_xor_si256( c, a ); \
d = _mm256_or_si256( d, t ); \
d = _mm256_xor_si256( d, b ); \
t = _mm256_xor_si256( t, c ); \
b = d; \
d = _mm256_or_si256( d, t ); \
d = _mm256_xor_si256( d, a ); \
a = _mm256_and_si256( a, b ); \
t = _mm256_xor_si256( t, a ); \
b = _mm256_xor_si256( b, d ); \
b = _mm256_xor_si256( b, t ); \
a = c; \
c = b; \
b = d; \
d = mm256_not( t ); \
} while (0)
#define L( a, b, c, d ) \
do { \
a = mm256_rotl_32( a, 13 ); \
c = mm256_rotl_32( c, 3 ); \
b = _mm256_xor_si256( b, _mm256_xor_si256( a, c ) ); \
d = _mm256_xor_si256( d, _mm256_xor_si256( c, \
_mm256_slli_epi32( a, 3 ) ) ); \
b = mm256_rotl_32( b, 1 ); \
d = mm256_rotl_32( d, 7 ); \
a = _mm256_xor_si256( a, _mm256_xor_si256( b, d ) ); \
c = _mm256_xor_si256( c, _mm256_xor_si256( d, \
_mm256_slli_epi32( b, 7 ) ) ); \
a = mm256_rotl_32( a, 5 ); \
c = mm256_rotl_32( c, 22 ); \
} while (0)
#define DECL_STATE_BIG \
__m256i c0, c1, c2, c3, c4, c5, c6, c7; \
#define READ_STATE_BIG(sc) \
do { \
c0 = sc->h[0x0]; \
c1 = sc->h[0x1]; \
c2 = sc->h[0x2]; \
c3 = sc->h[0x3]; \
c4 = sc->h[0x4]; \
c5 = sc->h[0x5]; \
c6 = sc->h[0x6]; \
c7 = sc->h[0x7]; \
} while (0)
#define WRITE_STATE_BIG(sc) \
do { \
sc->h[0x0] = c0; \
sc->h[0x1] = c1; \
sc->h[0x2] = c2; \
sc->h[0x3] = c3; \
sc->h[0x4] = c4; \
sc->h[0x5] = c5; \
sc->h[0x6] = c6; \
sc->h[0x7] = c7; \
} while (0)
#define s0 m0
#define s1 c0
#define s2 m1
#define s3 c1
#define s4 c2
#define s5 m2
#define s6 c3
#define s7 m3
#define s8 m4
#define s9 c4
#define sA m5
#define sB c5
#define sC c6
#define sD m6
#define sE c7
#define sF m7
#define ROUND_BIG(rc, alpha) \
do { \
__m256i t0, t1, t2, t3; \
s0 = _mm256_xor_si256( s0, _mm256_set_epi32( \
alpha[0x01] ^ (rc), alpha[0x00], alpha[0x01] ^ (rc), alpha[0x00], \
alpha[0x01] ^ (rc), alpha[0x00], alpha[0x01] ^ (rc), alpha[0x00] ) ); \
s1 = _mm256_xor_si256( s1, _mm256_set_epi32( \
alpha[0x03], alpha[0x02], alpha[0x03], alpha[0x02], \
alpha[0x03], alpha[0x02], alpha[0x03], alpha[0x02] ) ); \
s2 = _mm256_xor_si256( s2, _mm256_set_epi32( \
alpha[0x05], alpha[0x04], alpha[0x05], alpha[0x04], \
alpha[0x05], alpha[0x04], alpha[0x05], alpha[0x04] ) ); \
s3 = _mm256_xor_si256( s3, _mm256_set_epi32( \
alpha[0x07], alpha[0x06], alpha[0x07], alpha[0x06], \
alpha[0x07], alpha[0x06], alpha[0x07], alpha[0x06] ) ); \
s4 = _mm256_xor_si256( s4, _mm256_set_epi32( \
alpha[0x09], alpha[0x08], alpha[0x09], alpha[0x08], \
alpha[0x09], alpha[0x08], alpha[0x09], alpha[0x08] ) ); \
s5 = _mm256_xor_si256( s5, _mm256_set_epi32( \
alpha[0x0B], alpha[0x0A], alpha[0x0B], alpha[0x0A], \
alpha[0x0B], alpha[0x0A], alpha[0x0B], alpha[0x0A] ) ); \
s6 = _mm256_xor_si256( s6, _mm256_set_epi32( \
alpha[0x0D], alpha[0x0C], alpha[0x0D], alpha[0x0C], \
alpha[0x0D], alpha[0x0C], alpha[0x0D], alpha[0x0C] ) ); \
s7 = _mm256_xor_si256( s7, _mm256_set_epi32( \
alpha[0x0F], alpha[0x0E], alpha[0x0F], alpha[0x0E], \
alpha[0x0F], alpha[0x0E], alpha[0x0F], alpha[0x0E] ) ); \
s8 = _mm256_xor_si256( s8, _mm256_set_epi32( \
alpha[0x11], alpha[0x10], alpha[0x11], alpha[0x10], \
alpha[0x11], alpha[0x10], alpha[0x11], alpha[0x10] ) ); \
s9 = _mm256_xor_si256( s9, _mm256_set_epi32( \
alpha[0x13], alpha[0x12], alpha[0x13], alpha[0x12], \
alpha[0x13], alpha[0x12], alpha[0x13], alpha[0x12] ) ); \
sA = _mm256_xor_si256( sA, _mm256_set_epi32( \
alpha[0x15], alpha[0x14], alpha[0x15], alpha[0x14], \
alpha[0x15], alpha[0x14], alpha[0x15], alpha[0x14] ) ); \
sB = _mm256_xor_si256( sB, _mm256_set_epi32( \
alpha[0x17], alpha[0x16], alpha[0x17], alpha[0x16], \
alpha[0x17], alpha[0x16], alpha[0x17], alpha[0x16] ) ); \
sC = _mm256_xor_si256( sC, _mm256_set_epi32( \
alpha[0x19], alpha[0x18], alpha[0x19], alpha[0x18], \
alpha[0x19], alpha[0x18], alpha[0x19], alpha[0x18] ) ); \
sD = _mm256_xor_si256( sD, _mm256_set_epi32( \
alpha[0x1B], alpha[0x1A], alpha[0x1B], alpha[0x1A], \
alpha[0x1B], alpha[0x1A], alpha[0x1B], alpha[0x1A] ) ); \
sE = _mm256_xor_si256( sE, _mm256_set_epi32( \
alpha[0x1D], alpha[0x1C], alpha[0x1D], alpha[0x1C], \
alpha[0x1D], alpha[0x1C], alpha[0x1D], alpha[0x1C] ) ); \
sF = _mm256_xor_si256( sF, _mm256_set_epi32( \
alpha[0x1F], alpha[0x1E], alpha[0x1F], alpha[0x1E], \
alpha[0x1F], alpha[0x1E], alpha[0x1F], alpha[0x1E] ) ); \
\
SBOX( s0, s4, s8, sC ); \
SBOX( s1, s5, s9, sD ); \
SBOX( s2, s6, sA, sE ); \
SBOX( s3, s7, sB, sF ); \
\
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s4, 4 ), \
_mm256_bslli_epi128( s5, 4 ), 0xAA ); \
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sD, 4 ), \
_mm256_bslli_epi128( sE, 4 ), 0xAA ); \
L( s0, t1, s9, t3 ); \
s4 = _mm256_blend_epi32( s4, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
s5 = _mm256_blend_epi32( s5, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
sD = _mm256_blend_epi32( sD, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
sE = _mm256_blend_epi32( sE, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
\
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s5, 4 ), \
_mm256_bslli_epi128( s6, 4 ), 0xAA ); \
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sE, 4 ), \
_mm256_bslli_epi128( sF, 4 ), 0xAA ); \
L( s1, t1, sA, t3 ); \
s5 = _mm256_blend_epi32( s5, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
s6 = _mm256_blend_epi32( s6, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
sE = _mm256_blend_epi32( sE, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
sF = _mm256_blend_epi32( sF, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
\
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s6, 4 ), \
_mm256_bslli_epi128( s7, 4 ), 0xAA ); \
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sF, 4 ), \
_mm256_bslli_epi128( sC, 4 ), 0xAA ); \
L( s2, t1, sB, t3 ); \
s6 = _mm256_blend_epi32( s6, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
s7 = _mm256_blend_epi32( s7, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
sF = _mm256_blend_epi32( sF, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
sC = _mm256_blend_epi32( sC, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
\
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s7, 4 ), \
_mm256_bslli_epi128( s4, 4 ), 0xAA ); \
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sC, 4 ), \
_mm256_bslli_epi128( sD, 4 ), 0xAA ); \
L( s3, t1, s8, t3 ); \
s7 = _mm256_blend_epi32( s7, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
s4 = _mm256_blend_epi32( s4, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
sC = _mm256_blend_epi32( sC, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
sD = _mm256_blend_epi32( sD, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
\
t0 = _mm256_blend_epi32( s0, _mm256_bslli_epi128( s8, 4 ), 0xAA ); \
t1 = _mm256_blend_epi32( s1, s9, 0xAA ); \
t2 = _mm256_blend_epi32( _mm256_bsrli_epi128( s2, 4 ), sA, 0xAA ); \
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( s3, 4 ), \
_mm256_bslli_epi128( sB, 4 ), 0xAA ); \
L( t0, t1, t2, t3 ); \
s0 = _mm256_blend_epi32( s0, t0, 0x55 ); \
s8 = _mm256_blend_epi32( s8, _mm256_bsrli_epi128( t0, 4 ), 0x55 ); \
s1 = _mm256_blend_epi32( s1, t1, 0x55 ); \
s9 = _mm256_blend_epi32( s9, t1, 0xAA ); \
s2 = _mm256_blend_epi32( s2, _mm256_bslli_epi128( t2, 4 ), 0xAA ); \
sA = _mm256_blend_epi32( sA, t2, 0xAA ); \
s3 = _mm256_blend_epi32( s3, _mm256_bslli_epi128( t3, 4 ), 0xAA ); \
sB = _mm256_blend_epi32( sB, _mm256_bsrli_epi128( t3, 4 ), 0x55 ); \
\
t0 = _mm256_blend_epi32( _mm256_bsrli_epi128( s4, 4 ), sC, 0xAA ); \
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s5, 4 ), \
_mm256_bslli_epi128( sD, 4 ), 0xAA ); \
t2 = _mm256_blend_epi32( s6, _mm256_bslli_epi128( sE, 4 ), 0xAA ); \
t3 = _mm256_blend_epi32( s7, sF, 0xAA ); \
L( t0, t1, t2, t3 ); \
s4 = _mm256_blend_epi32( s4, _mm256_bslli_epi128( t0, 4 ), 0xAA ); \
sC = _mm256_blend_epi32( sC, t0, 0xAA ); \
s5 = _mm256_blend_epi32( s5, _mm256_bslli_epi128( t1, 4 ), 0xAA ); \
sD = _mm256_blend_epi32( sD, _mm256_bsrli_epi128( t1, 4 ), 0x55 ); \
s6 = _mm256_blend_epi32( s6, t2, 0x55 ); \
sE = _mm256_blend_epi32( sE, _mm256_bsrli_epi128( t2, 4 ), 0x55 ); \
s7 = _mm256_blend_epi32( s7, t3, 0x55 ); \
sF = _mm256_blend_epi32( sF, t3, 0xAA ); \
} while (0)
#define P_BIG \
do { \
ROUND_BIG(0, alpha_n); \
ROUND_BIG(1, alpha_n); \
ROUND_BIG(2, alpha_n); \
ROUND_BIG(3, alpha_n); \
ROUND_BIG(4, alpha_n); \
ROUND_BIG(5, alpha_n); \
} while (0)
#define PF_BIG \
do { \
ROUND_BIG( 0, alpha_f); \
ROUND_BIG( 1, alpha_f); \
ROUND_BIG( 2, alpha_f); \
ROUND_BIG( 3, alpha_f); \
ROUND_BIG( 4, alpha_f); \
ROUND_BIG( 5, alpha_f); \
ROUND_BIG( 6, alpha_f); \
ROUND_BIG( 7, alpha_f); \
ROUND_BIG( 8, alpha_f); \
ROUND_BIG( 9, alpha_f); \
ROUND_BIG(10, alpha_f); \
ROUND_BIG(11, alpha_f); \
} while (0)
#define T_BIG \
do { /* order is important */ \
c7 = sc->h[ 0x7 ] = _mm256_xor_si256( sc->h[ 0x7 ], sB ); \
c6 = sc->h[ 0x6 ] = _mm256_xor_si256( sc->h[ 0x6 ], sA ); \
c5 = sc->h[ 0x5 ] = _mm256_xor_si256( sc->h[ 0x5 ], s9 ); \
c4 = sc->h[ 0x4 ] = _mm256_xor_si256( sc->h[ 0x4 ], s8 ); \
c3 = sc->h[ 0x3 ] = _mm256_xor_si256( sc->h[ 0x3 ], s3 ); \
c2 = sc->h[ 0x2 ] = _mm256_xor_si256( sc->h[ 0x2 ], s2 ); \
c1 = sc->h[ 0x1 ] = _mm256_xor_si256( sc->h[ 0x1 ], s1 ); \
c0 = sc->h[ 0x0 ] = _mm256_xor_si256( sc->h[ 0x0 ], s0 ); \
} while (0)
void hamsi_big( hamsi_4way_big_context *sc, __m256i *buf, size_t num )
{
DECL_STATE_BIG
sph_u32 tmp;
tmp = SPH_T32( (sph_u32)num << 6 );
sc->count_low = SPH_T32( sc->count_low + tmp );
sc->count_high += (sph_u32)( (num >> 13) >> 13 );
if ( sc->count_low < tmp )
sc->count_high++;
READ_STATE_BIG( sc );
while ( num-- > 0 )
{
__m256i m0, m1, m2, m3, m4, m5, m6, m7;
INPUT_BIG;
P_BIG;
T_BIG;
buf++;
}
WRITE_STATE_BIG( sc );
}
void hamsi_big_final( hamsi_4way_big_context *sc, __m256i *buf )
{
__m256i m0, m1, m2, m3, m4, m5, m6, m7;
DECL_STATE_BIG
READ_STATE_BIG( sc );
INPUT_BIG;
PF_BIG;
T_BIG;
WRITE_STATE_BIG( sc );
}
void hamsi512_4way_init( hamsi_4way_big_context *sc )
{
sc->partial_len = 0;
sph_u32 lo, hi;
sc->count_high = sc->count_low = 0;
for ( int i = 0; i < 8; i++ )
{
lo = 2*i;
hi = 2*i + 1;
sc->h[i] = _mm256_set_epi32( IV512[hi], IV512[lo], IV512[hi], IV512[lo],
IV512[hi], IV512[lo], IV512[hi], IV512[lo] );
}
}
void hamsi512_4way( hamsi_4way_big_context *sc, const void *data, size_t len )
{
__m256i *vdata = (__m256i*)data;
// It looks like the only way to get in here is if core was previously called
// with a very small len
// That's not likely even with 80 byte input so deprecate partial len
/*
if ( sc->partial_len != 0 )
{
size_t mlen;
mlen = 8 - sc->partial_len;
if ( len < mlen )
{
memcpy_256( sc->partial + (sc->partial_len >> 3), data, len>>3 );
sc->partial_len += len;
return;
}
else
{
memcpy_256( sc->partial + (sc->partial_len >> 3), data, mlen>>3 );
len -= mlen;
vdata += mlen>>3;
hamsi_big( sc, sc->partial, 1 );
sc->partial_len = 0;
}
}
*/
hamsi_big( sc, vdata, len>>3 );
vdata += ( (len& ~(size_t)7) >> 3 );
len &= (size_t)7;
memcpy_256( sc->buf, vdata, len>>3 );
sc->partial_len = len;
}
void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
{
__m256i *out = (__m256i*)dst;
__m256i pad[1];
size_t u;
int ch, cl;
sph_enc32be( &ch, sc->count_high );
sph_enc32be( &cl, sc->count_low + ( sc->partial_len << 3 ) );
pad[0] = _mm256_set_epi32( cl, ch, cl, ch, cl, ch, cl, ch );
sc->buf[0] = _mm256_set_epi32( 0UL, 0x80UL, 0UL, 0x80UL,
0UL, 0x80UL, 0UL, 0x80UL );
hamsi_big( sc, sc->buf, 1 );
hamsi_big_final( sc, pad );
for ( u = 0; u < 8; u ++ )
out[u] = mm256_bswap_32( sc->h[u] );
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,72 @@
/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
/**
* Hamsi interface. This code implements Hamsi with the recommended
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_hamsi.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef HAMSI_4WAY_H__
#define HAMSI_4WAY_H__
#include <stddef.h>
#include "algo/sha/sph_types.h"
#if defined (__AVX__)
#include "avxdefs.h"
#ifdef __cplusplus
extern "C"{
#endif
#define SPH_SIZE_hamsi512 512
// Partial is only scalar but needs pointer ref for hamsi-helper
// deprecate partial_len
typedef struct {
__m256i h[8];
__m256i buf[1];
size_t partial_len;
sph_u32 count_high, count_low;
} hamsi_4way_big_context;
typedef hamsi_4way_big_context hamsi512_4way_context;
void hamsi512_4way_init( hamsi512_4way_context *sc );
void hamsi512_4way( hamsi512_4way_context *sc, const void *data, size_t len );
void hamsi512_4way_close( hamsi512_4way_context *sc, void *dst );
#ifdef __cplusplus
}
#endif
#endif
#endif

View File

@@ -36,7 +36,7 @@
#define SPH_HAMSI_H__
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
#ifdef __cplusplus
extern "C"{

View File

@@ -0,0 +1,115 @@
/* $Id: haval_helper.c 218 2010-06-08 17:06:34Z tp $ */
/*
* Helper code, included (three times !) by HAVAL implementation.
*
* TODO: try to merge this with md_helper.c.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#undef SPH_XCAT
#define SPH_XCAT(a, b) SPH_XCAT_(a, b)
#undef SPH_XCAT_
#define SPH_XCAT_(a, b) a ## b
static void
SPH_XCAT(SPH_XCAT(haval, PASSES), _4way)
( haval_4way_context *sc, const void *data, size_t len )
{
__m128i *vdata = (__m128i*)data;
unsigned current;
current = (unsigned)sc->count_low & 127U;
while ( len > 0 )
{
unsigned clen;
sph_u32 clow, clow2;
clen = 128U - current;
if ( clen > len )
clen = len;
memcpy_128( sc->buf + (current>>2), vdata, clen>>2 );
vdata += clen>>2;
current += clen;
len -= clen;
if ( current == 128U )
{
DSTATE;
IN_PREPARE(sc->buf);
RSTATE;
SPH_XCAT(CORE, PASSES)(INW);
WSTATE;
current = 0;
}
clow = sc->count_low;
clow2 = SPH_T32(clow + clen);
sc->count_low = clow2;
if ( clow2 < clow )
sc->count_high ++;
}
}
static void
SPH_XCAT(SPH_XCAT(haval, PASSES), _4way_close)( haval_4way_context *sc,
void *dst)
{
unsigned current;
DSTATE;
current = (unsigned)sc->count_low & 127UL;
sc->buf[ current>>2 ] = m128_one_32;
current += 4;
RSTATE;
if ( current > 116UL )
{
memset_zero_128( sc->buf + ( current>>2 ), (128UL-current) >> 2 );
do
{
IN_PREPARE(sc->buf);
SPH_XCAT(CORE, PASSES)(INW);
} while (0);
current = 0;
}
uint32_t t1, t2;
memset_zero_128( sc->buf + ( current>>2 ), (116UL-current) >> 2 );
t1 = 0x01 | (PASSES << 3);
t2 = sc->olen << 3;
sc->buf[ 116>>2 ] = _mm_set1_epi32( ( t1 << 16 ) | ( t2 << 24 ) );
sc->buf[ 120>>2 ] = _mm_set1_epi32( sc->count_low << 3 );
sc->buf[ 124>>2 ] = _mm_set1_epi32( (sc->count_high << 3)
| (sc->count_low >> 29) );
do
{
IN_PREPARE(sc->buf);
SPH_XCAT(CORE, PASSES)(INW);
} while (0);
WSTATE;
haval_4way_out( sc, dst );
}

View File

@@ -0,0 +1,522 @@
/* $Id: haval.c 227 2010-06-16 17:28:38Z tp $ */
/*
* HAVAL implementation.
*
* The HAVAL reference paper is of questionable clarity with regards to
* some details such as endianness of bits within a byte, bytes within
* a 32-bit word, or the actual ordering of words within a stream of
* words. This implementation has been made compatible with the reference
* implementation available on: http://labs.calyptix.com/haval.php
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "haval-hash-4way.h"
#if defined (__AVX__)
#ifdef __cplusplus
extern "C"{
#endif
//#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAVAL
#define SPH_SMALL_FOOTPRINT_HAVAL 1
//#endif
#define F1(x6, x5, x4, x3, x2, x1, x0) \
_mm_xor_si128( x0, \
_mm_xor_si128( _mm_and_si128(_mm_xor_si128( x0, x4 ), x1 ), \
_mm_xor_si128( _mm_and_si128( x2, x5 ), \
_mm_and_si128( x3, x6 ) ) ) ) \
#define F2(x6, x5, x4, x3, x2, x1, x0) \
_mm_xor_si128( \
_mm_and_si128( x2, \
_mm_xor_si128( _mm_andnot_si128( x3, x1 ), \
_mm_xor_si128( _mm_and_si128( x4, x5 ), \
_mm_xor_si128( x6, x0 ) ) ) ), \
_mm_xor_si128( \
_mm_and_si128( x4, _mm_xor_si128( x1, x5 ) ), \
_mm_xor_si128( _mm_and_si128( x3, x5 ), x0 ) ) ) \
#define F3(x6, x5, x4, x3, x2, x1, x0) \
_mm_xor_si128( \
_mm_and_si128( x3, \
_mm_xor_si128( _mm_and_si128( x1, x2 ), \
_mm_xor_si128( x6, x0 ) ) ), \
_mm_xor_si128( _mm_xor_si128(_mm_and_si128( x1, x4 ), \
_mm_and_si128( x2, x5 ) ), x0 ) )
#define F4(x6, x5, x4, x3, x2, x1, x0) \
_mm_xor_si128( \
_mm_xor_si128( \
_mm_and_si128( x3, \
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( x1, x2 ), \
_mm_or_si128( x4, x6 ) ), x5 ) ), \
_mm_and_si128( x4, \
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( mm_not(x2), x5 ), \
_mm_xor_si128( x1, x6 ) ), x0 ) ) ), \
_mm_xor_si128( _mm_and_si128( x2, x6 ), x0 ) )
#define F5(x6, x5, x4, x3, x2, x1, x0) \
_mm_xor_si128( \
_mm_and_si128( x0, \
mm_not( _mm_xor_si128( \
_mm_and_si128( _mm_and_si128( x1, x2 ), x3 ), x5 ) ) ), \
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( x1, x4 ), \
_mm_and_si128( x2, x5 ) ), \
_mm_and_si128( x3, x6 ) ) )
/*
* The macros below integrate the phi() permutations, depending on the
* pass and the total number of passes.
*/
#define FP3_1(x6, x5, x4, x3, x2, x1, x0) \
F1(x1, x0, x3, x5, x6, x2, x4)
#define FP3_2(x6, x5, x4, x3, x2, x1, x0) \
F2(x4, x2, x1, x0, x5, x3, x6)
#define FP3_3(x6, x5, x4, x3, x2, x1, x0) \
F3(x6, x1, x2, x3, x4, x5, x0)
#define FP4_1(x6, x5, x4, x3, x2, x1, x0) \
F1(x2, x6, x1, x4, x5, x3, x0)
#define FP4_2(x6, x5, x4, x3, x2, x1, x0) \
F2(x3, x5, x2, x0, x1, x6, x4)
#define FP4_3(x6, x5, x4, x3, x2, x1, x0) \
F3(x1, x4, x3, x6, x0, x2, x5)
#define FP4_4(x6, x5, x4, x3, x2, x1, x0) \
F4(x6, x4, x0, x5, x2, x1, x3)
#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \
F1(x3, x4, x1, x0, x5, x2, x6)
#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \
F2(x6, x2, x1, x0, x3, x4, x5)
#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \
F3(x2, x6, x0, x4, x3, x1, x5)
#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \
F4(x1, x5, x3, x2, x0, x4, x6)
#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \
F5(x2, x5, x0, x6, x4, x3, x1)
/*
* One step, for "n" passes, pass number "p" (1 <= p <= n), using
* input word number "w" and step constant "c".
*/
#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) \
do { \
__m128i t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
x7 = _mm_add_epi32( _mm_add_epi32( mm_rotr_32( t, 7 ), \
mm_rotr_32( x7, 11 ) ), \
_mm_add_epi32( w, _mm_set1_epi32( c ) ) ); \
} while (0)
/*
* PASSy(n, in) computes pass number "y", for a total of "n", using the
* one-argument macro "in" to access input words. Current state is assumed
* to be held in variables "s0" to "s7".
*/
//#if SPH_SMALL_FOOTPRINT_HAVAL
#define PASS1(n, in) do { \
unsigned pass_count; \
for (pass_count = 0; pass_count < 32; pass_count += 8) { \
STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, \
in(pass_count + 0), SPH_C32(0x00000000)); \
STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, \
in(pass_count + 1), SPH_C32(0x00000000)); \
STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, \
in(pass_count + 2), SPH_C32(0x00000000)); \
STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, \
in(pass_count + 3), SPH_C32(0x00000000)); \
STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, \
in(pass_count + 4), SPH_C32(0x00000000)); \
STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, \
in(pass_count + 5), SPH_C32(0x00000000)); \
STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, \
in(pass_count + 6), SPH_C32(0x00000000)); \
STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, \
in(pass_count + 7), SPH_C32(0x00000000)); \
} \
} while (0)
#define PASSG(p, n, in) do { \
unsigned pass_count; \
for (pass_count = 0; pass_count < 32; pass_count += 8) { \
STEP(n, p, s7, s6, s5, s4, s3, s2, s1, s0, \
in(MP ## p[pass_count + 0]), \
RK ## p[pass_count + 0]); \
STEP(n, p, s6, s5, s4, s3, s2, s1, s0, s7, \
in(MP ## p[pass_count + 1]), \
RK ## p[pass_count + 1]); \
STEP(n, p, s5, s4, s3, s2, s1, s0, s7, s6, \
in(MP ## p[pass_count + 2]), \
RK ## p[pass_count + 2]); \
STEP(n, p, s4, s3, s2, s1, s0, s7, s6, s5, \
in(MP ## p[pass_count + 3]), \
RK ## p[pass_count + 3]); \
STEP(n, p, s3, s2, s1, s0, s7, s6, s5, s4, \
in(MP ## p[pass_count + 4]), \
RK ## p[pass_count + 4]); \
STEP(n, p, s2, s1, s0, s7, s6, s5, s4, s3, \
in(MP ## p[pass_count + 5]), \
RK ## p[pass_count + 5]); \
STEP(n, p, s1, s0, s7, s6, s5, s4, s3, s2, \
in(MP ## p[pass_count + 6]), \
RK ## p[pass_count + 6]); \
STEP(n, p, s0, s7, s6, s5, s4, s3, s2, s1, \
in(MP ## p[pass_count + 7]), \
RK ## p[pass_count + 7]); \
} \
} while (0)
#define PASS2(n, in) PASSG(2, n, in)
#define PASS3(n, in) PASSG(3, n, in)
#define PASS4(n, in) PASSG(4, n, in)
#define PASS5(n, in) PASSG(5, n, in)
static const unsigned MP2[32] = {
5, 14, 26, 18, 11, 28, 7, 16,
0, 23, 20, 22, 1, 10, 4, 8,
30, 3, 21, 9, 17, 24, 29, 6,
19, 12, 15, 13, 2, 25, 31, 27
};
static const unsigned MP3[32] = {
19, 9, 4, 20, 28, 17, 8, 22,
29, 14, 25, 12, 24, 30, 16, 26,
31, 15, 7, 3, 1, 0, 18, 27,
13, 6, 21, 10, 23, 11, 5, 2
};
static const unsigned MP4[32] = {
24, 4, 0, 14, 2, 7, 28, 23,
26, 6, 30, 20, 18, 25, 19, 3,
22, 11, 31, 21, 8, 27, 12, 9,
1, 29, 5, 15, 17, 10, 16, 13
};
static const unsigned MP5[32] = {
27, 3, 21, 26, 17, 11, 20, 29,
19, 0, 12, 7, 13, 8, 31, 10,
5, 9, 14, 30, 18, 6, 28, 24,
2, 23, 16, 22, 4, 1, 25, 15
};
static const sph_u32 RK2[32] = {
SPH_C32(0x452821E6), SPH_C32(0x38D01377),
SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C),
SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD),
SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917),
SPH_C32(0x9216D5D9), SPH_C32(0x8979FB1B),
SPH_C32(0xD1310BA6), SPH_C32(0x98DFB5AC),
SPH_C32(0x2FFD72DB), SPH_C32(0xD01ADFB7),
SPH_C32(0xB8E1AFED), SPH_C32(0x6A267E96),
SPH_C32(0xBA7C9045), SPH_C32(0xF12C7F99),
SPH_C32(0x24A19947), SPH_C32(0xB3916CF7),
SPH_C32(0x0801F2E2), SPH_C32(0x858EFC16),
SPH_C32(0x636920D8), SPH_C32(0x71574E69),
SPH_C32(0xA458FEA3), SPH_C32(0xF4933D7E),
SPH_C32(0x0D95748F), SPH_C32(0x728EB658),
SPH_C32(0x718BCD58), SPH_C32(0x82154AEE),
SPH_C32(0x7B54A41D), SPH_C32(0xC25A59B5)
};
static const sph_u32 RK3[32] = {
SPH_C32(0x9C30D539), SPH_C32(0x2AF26013),
SPH_C32(0xC5D1B023), SPH_C32(0x286085F0),
SPH_C32(0xCA417918), SPH_C32(0xB8DB38EF),
SPH_C32(0x8E79DCB0), SPH_C32(0x603A180E),
SPH_C32(0x6C9E0E8B), SPH_C32(0xB01E8A3E),
SPH_C32(0xD71577C1), SPH_C32(0xBD314B27),
SPH_C32(0x78AF2FDA), SPH_C32(0x55605C60),
SPH_C32(0xE65525F3), SPH_C32(0xAA55AB94),
SPH_C32(0x57489862), SPH_C32(0x63E81440),
SPH_C32(0x55CA396A), SPH_C32(0x2AAB10B6),
SPH_C32(0xB4CC5C34), SPH_C32(0x1141E8CE),
SPH_C32(0xA15486AF), SPH_C32(0x7C72E993),
SPH_C32(0xB3EE1411), SPH_C32(0x636FBC2A),
SPH_C32(0x2BA9C55D), SPH_C32(0x741831F6),
SPH_C32(0xCE5C3E16), SPH_C32(0x9B87931E),
SPH_C32(0xAFD6BA33), SPH_C32(0x6C24CF5C)
};
static const sph_u32 RK4[32] = {
SPH_C32(0x7A325381), SPH_C32(0x28958677),
SPH_C32(0x3B8F4898), SPH_C32(0x6B4BB9AF),
SPH_C32(0xC4BFE81B), SPH_C32(0x66282193),
SPH_C32(0x61D809CC), SPH_C32(0xFB21A991),
SPH_C32(0x487CAC60), SPH_C32(0x5DEC8032),
SPH_C32(0xEF845D5D), SPH_C32(0xE98575B1),
SPH_C32(0xDC262302), SPH_C32(0xEB651B88),
SPH_C32(0x23893E81), SPH_C32(0xD396ACC5),
SPH_C32(0x0F6D6FF3), SPH_C32(0x83F44239),
SPH_C32(0x2E0B4482), SPH_C32(0xA4842004),
SPH_C32(0x69C8F04A), SPH_C32(0x9E1F9B5E),
SPH_C32(0x21C66842), SPH_C32(0xF6E96C9A),
SPH_C32(0x670C9C61), SPH_C32(0xABD388F0),
SPH_C32(0x6A51A0D2), SPH_C32(0xD8542F68),
SPH_C32(0x960FA728), SPH_C32(0xAB5133A3),
SPH_C32(0x6EEF0B6C), SPH_C32(0x137A3BE4)
};
static const sph_u32 RK5[32] = {
SPH_C32(0xBA3BF050), SPH_C32(0x7EFB2A98),
SPH_C32(0xA1F1651D), SPH_C32(0x39AF0176),
SPH_C32(0x66CA593E), SPH_C32(0x82430E88),
SPH_C32(0x8CEE8619), SPH_C32(0x456F9FB4),
SPH_C32(0x7D84A5C3), SPH_C32(0x3B8B5EBE),
SPH_C32(0xE06F75D8), SPH_C32(0x85C12073),
SPH_C32(0x401A449F), SPH_C32(0x56C16AA6),
SPH_C32(0x4ED3AA62), SPH_C32(0x363F7706),
SPH_C32(0x1BFEDF72), SPH_C32(0x429B023D),
SPH_C32(0x37D0D724), SPH_C32(0xD00A1248),
SPH_C32(0xDB0FEAD3), SPH_C32(0x49F1C09B),
SPH_C32(0x075372C9), SPH_C32(0x80991B7B),
SPH_C32(0x25D479D8), SPH_C32(0xF6E8DEF7),
SPH_C32(0xE3FE501A), SPH_C32(0xB6794C3B),
SPH_C32(0x976CE0BD), SPH_C32(0x04C006BA),
SPH_C32(0xC1A94FB6), SPH_C32(0x409F60C4)
};
#define SAVE_STATE \
__m128i u0, u1, u2, u3, u4, u5, u6, u7; \
do { \
u0 = s0; \
u1 = s1; \
u2 = s2; \
u3 = s3; \
u4 = s4; \
u5 = s5; \
u6 = s6; \
u7 = s7; \
} while (0)
#define UPDATE_STATE \
do { \
s0 = _mm_add_epi32( s0, u0 ); \
s1 = _mm_add_epi32( s1, u1 ); \
s2 = _mm_add_epi32( s2, u2 ); \
s3 = _mm_add_epi32( s3, u3 ); \
s4 = _mm_add_epi32( s4, u4 ); \
s5 = _mm_add_epi32( s5, u5 ); \
s6 = _mm_add_epi32( s6, u6 ); \
s7 = _mm_add_epi32( s7, u7 ); \
} while (0)
/*
* COREn(in) performs the core HAVAL computation for "n" passes, using
* the one-argument macro "in" to access the input words. Running state
* is held in variable "s0" to "s7".
*/
/*
#define CORE3(in) do { \
SAVE_STATE; \
PASS1(3, in); \
PASS2(3, in); \
PASS3(3, in); \
UPDATE_STATE; \
} while (0)
#define CORE4(in) do { \
SAVE_STATE; \
PASS1(4, in); \
PASS2(4, in); \
PASS3(4, in); \
PASS4(4, in); \
UPDATE_STATE; \
} while (0)
*/
#define CORE5(in) do { \
SAVE_STATE; \
PASS1(5, in); \
PASS2(5, in); \
PASS3(5, in); \
PASS4(5, in); \
PASS5(5, in); \
UPDATE_STATE; \
} while (0)
/*
* DSTATE declares the state variables "s0" to "s7".
*/
#define DSTATE __m128i s0, s1, s2, s3, s4, s5, s6, s7
/*
* RSTATE fills the state variables from the context "sc".
*/
#define RSTATE \
do { \
s0 = sc->s0; \
s1 = sc->s1; \
s2 = sc->s2; \
s3 = sc->s3; \
s4 = sc->s4; \
s5 = sc->s5; \
s6 = sc->s6; \
s7 = sc->s7; \
} while (0)
/*
* WSTATE updates the context "sc" from the state variables.
*/
#define WSTATE \
do { \
sc->s0 = s0; \
sc->s1 = s1; \
sc->s2 = s2; \
sc->s3 = s3; \
sc->s4 = s4; \
sc->s5 = s5; \
sc->s6 = s6; \
sc->s7 = s7; \
} while (0)
/*
* Initialize a context. "olen" is the output length, in 32-bit words
* (between 4 and 8, inclusive). "passes" is the number of passes
* (3, 4 or 5).
*/
static void
haval_4way_init( haval_4way_context *sc, unsigned olen, unsigned passes )
{
sc->s0 = _mm_set1_epi32( 0x243F6A88UL );
sc->s1 = _mm_set1_epi32( 0x85A308D3UL );
sc->s2 = _mm_set1_epi32( 0x13198A2EUL );
sc->s3 = _mm_set1_epi32( 0x03707344UL );
sc->s4 = _mm_set1_epi32( 0xA4093822UL );
sc->s5 = _mm_set1_epi32( 0x299F31D0UL );
sc->s6 = _mm_set1_epi32( 0x082EFA98UL );
sc->s7 = _mm_set1_epi32( 0xEC4E6C89UL );
sc->olen = olen;
sc->passes = passes;
sc->count_high = 0;
sc->count_low = 0;
}
#define IN_PREPARE(indata) const __m128i *const load_ptr = (indata)
#define INW(i) load_ptr[ i ]
/*
* Write out HAVAL output. The output length is tailored to the requested
* length.
*/
static void
haval_4way_out( haval_4way_context *sc, void *dst )
{
__m128i *buf = (__m128i*)dst;
DSTATE;
RSTATE;
buf[0] = s0;
buf[1] = s1;
buf[2] = s2;
buf[3] = s3;
buf[4] = s4;
buf[5] = s5;
buf[6] = s6;
buf[7] = s7;
}
/*
* The main core functions inline the code with the COREx() macros. We
* use a helper file, included three times, which avoids code copying.
*/
/*
#undef PASSES
#define PASSES 3
#include "haval-helper.c"
#undef PASSES
#define PASSES 4
#include "haval-helper.c"
*/
#undef PASSES
#define PASSES 5
#include "haval-4way-helper.c"
/* ====================================================================== */
#define API(xxx, y) \
void \
haval ## xxx ## _ ## y ## _4way_init(void *cc) \
{ \
haval_4way_init(cc, xxx >> 5, y); \
} \
\
void \
haval ## xxx ## _ ## y ## _4way (void *cc, const void *data, size_t len) \
{ \
haval ## y ## _4way(cc, data, len); \
} \
\
void \
haval ## xxx ## _ ## y ## _4way_close(void *cc, void *dst) \
{ \
haval ## y ## _4way_close(cc, dst); \
} \
API(256, 5)
#define RVAL \
do { \
s0 = val[0]; \
s1 = val[1]; \
s2 = val[2]; \
s3 = val[3]; \
s4 = val[4]; \
s5 = val[5]; \
s6 = val[6]; \
s7 = val[7]; \
} while (0)
#define WVAL \
do { \
val[0] = s0; \
val[1] = s1; \
val[2] = s2; \
val[3] = s3; \
val[4] = s4; \
val[5] = s5; \
val[6] = s6; \
val[7] = s7; \
} while (0)
#define INMSG(i) msg[i]
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,95 @@
/* $Id: sph_haval.h 218 2010-06-08 17:06:34Z tp $ */
/**
* HAVAL interface.
*
* HAVAL is actually a family of 15 hash functions, depending on whether
* the internal computation uses 3, 4 or 5 passes, and on the output
* length, which is 128, 160, 192, 224 or 256 bits. This implementation
* provides interface functions for all 15, which internally map to
* three cores (depending on the number of passes). Note that output
* lengths other than 256 bits are not obtained by a simple truncation
* of a longer result; the requested length is encoded within the
* padding data.
*
* HAVAL was published in: Yuliang Zheng, Josef Pieprzyk and Jennifer
* Seberry: "HAVAL -- a one-way hashing algorithm with variable length
* of output", Advances in Cryptology -- AUSCRYPT'92, Lecture Notes in
* Computer Science, Vol.718, pp.83-104, Springer-Verlag, 1993.
*
* This paper, and a reference implementation, are available on the
* Calyptix web site: http://labs.calyptix.com/haval.php
*
* The HAVAL reference paper is quite unclear on the data encoding
* details, i.e. endianness (both byte order within a 32-bit word, and
* word order within a message block). This implementation has been
* made compatible with the reference implementation referenced above.
*
* @warning A collision for HAVAL-128/3 (HAVAL with three passes and
* 128-bit output) has been published; this function is thus considered
* as cryptographically broken. The status for other variants is unclear;
* use only with care.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_haval.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef HAVAL_HASH_4WAY_H__
#define HAVAL_HASH_4WAY_H__
#if defined(__AVX__)
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#define SPH_SIZE_haval256_5 256
typedef struct {
__m128i buf[32];
__m128i s0, s1, s2, s3, s4, s5, s6, s7;
unsigned olen, passes;
sph_u32 count_high, count_low;
} haval_4way_context;
typedef haval_4way_context haval256_5_4way_context;
void haval256_5_4way_init( void *cc );
void haval256_5_4way( void *cc, const void *data, size_t len );
void haval256_5_4way_close( void *cc, void *dst );
#ifdef __cplusplus
}
#endif
#endif
#endif

View File

@@ -66,7 +66,7 @@ extern "C"{
#endif
#include <stddef.h>
#include "algo/sha3/sph_types.h"
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for HAVAL-128/3.

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdio.h>
@@ -16,7 +15,7 @@
#include "algo/shabal/sph_shabal.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/skein/sse2/skein.c"
#ifndef NO_AES_NI
@@ -25,7 +24,7 @@
void bastionhash(void *output, const void *input)
{
unsigned char _ALIGN(128) hash[64] = { 0 };
unsigned char hash[64] __attribute__ ((aligned (64)));
#ifdef NO_AES_NI
sph_echo512_context ctx_echo;
@@ -36,7 +35,6 @@ void bastionhash(void *output, const void *input)
sph_fugue512_context ctx_fugue;
sph_whirlpool_context ctx_whirlpool;
sph_shabal512_context ctx_shabal;
// sph_skein512_context ctx_skein;
sph_hamsi512_context ctx_hamsi;
unsigned char hashbuf[128] __attribute__ ((aligned (16)));
@@ -47,8 +45,10 @@ void bastionhash(void *output, const void *input)
HEFTY1(input, 80, hash);
init_luffa( &ctx_luffa, 512 );
update_luffa( &ctx_luffa, hash, 64 );
final_luffa( &ctx_luffa, hash );
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
// update_luffa( &ctx_luffa, hash, 64 );
// final_luffa( &ctx_luffa, hash );
if (hash[0] & 0x8)
{
@@ -60,9 +60,6 @@ void bastionhash(void *output, const void *input)
SKN_I;
SKN_U;
SKN_C;
// sph_skein512_init(&ctx_skein);
// sph_skein512(&ctx_skein, hash, 64);
// sph_skein512_close(&ctx_skein, hash);
}
sph_whirlpool_init(&ctx_whirlpool);
@@ -81,13 +78,17 @@ void bastionhash(void *output, const void *input)
sph_echo512_close(&ctx_echo, hash);
#else
init_echo( &ctx_echo, 512 );
update_echo ( &ctx_echo, hash, 512 );
final_echo( &ctx_echo, hash );
update_final_echo ( &ctx_echo,(BitSequence*)hash,
(const BitSequence*)hash, 512 );
// update_echo ( &ctx_echo, hash, 512 );
// final_echo( &ctx_echo, hash );
#endif
} else {
init_luffa( &ctx_luffa, 512 );
update_luffa( &ctx_luffa, hash, 64 );
final_luffa( &ctx_luffa, hash );
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
// update_luffa( &ctx_luffa, hash, 64 );
// final_luffa( &ctx_luffa, hash );
}
sph_shabal512_init(&ctx_shabal);
@@ -98,9 +99,6 @@ void bastionhash(void *output, const void *input)
SKN_I;
SKN_U;
SKN_C;
// sph_skein512_init(&ctx_skein);
// sph_skein512(&ctx_skein, hash, 64);
// sph_skein512_close(&ctx_skein, hash);
if (hash[0] & 0x8)
{
@@ -124,8 +122,10 @@ void bastionhash(void *output, const void *input)
sph_hamsi512_close(&ctx_hamsi, hash);
} else {
init_luffa( &ctx_luffa, 512 );
update_luffa( &ctx_luffa, hash, 64 );
final_luffa( &ctx_luffa, hash );
update_and_final_luffa( &ctx_luffa, (BitSequence*)hash,
(const BitSequence*)hash, 64 );
// update_luffa( &ctx_luffa, hash, 64 );
// final_luffa( &ctx_luffa, hash );
}
memcpy(output, hash, 32);
@@ -170,7 +170,6 @@ bool register_bastion_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_bastion;
gate->hash = (void*)&bastionhash;
gate->hash_alt = (void*)&bastionhash;
return true;
};

View File

@@ -2,7 +2,6 @@
#include <openssl/sha.h>
#include <stdint.h>
#include "miner.h"
#include "algo-gate-api.h"
#include "sph_hefty1.h"
#include "algo/keccak/sph_keccak.h"

View File

@@ -1,171 +0,0 @@
// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2013 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_PRIMITIVES_BLOCK_H
#define BITCOIN_PRIMITIVES_BLOCK_H
#include "serialize.h"
#include "hodl_uint256.h"
/** Nodes collect new transactions into a block, hash them into a hash tree,
* and scan through nonce values to make the block's hash satisfy proof-of-work
* requirements. When they solve the proof-of-work, they broadcast the block
* to everyone and the block is added to the block chain. The first transaction
* in the block is a special one that creates a new coin owned by the creator
* of the block.
*/
class CBlockHeader
{
public:
// header
static const int32_t CURRENT_VERSION=4;
int32_t nVersion;
uint256 hashPrevBlock;
uint256 hashMerkleRoot;
uint32_t nTime;
uint32_t nBits;
uint32_t nNonce;
uint32_t nStartLocation;
uint32_t nFinalCalculation;
CBlockHeader()
{
SetNull();
}
ADD_SERIALIZE_METHODS;
template <typename Stream, typename Operation>
inline void SerializationOp(Stream& s, Operation ser_action, int nType, int nVersion) {
READWRITE(this->nVersion);
nVersion = this->nVersion;
READWRITE(hashPrevBlock);
READWRITE(hashMerkleRoot);
READWRITE(nTime);
READWRITE(nBits);
READWRITE(nNonce);
READWRITE(nStartLocation);
READWRITE(nFinalCalculation);
}
void SetNull()
{
nVersion = CBlockHeader::CURRENT_VERSION;
hashPrevBlock.SetNull();
hashMerkleRoot.SetNull();
nTime = 0;
nBits = 0;
nNonce = 0;
nStartLocation = 0;
nFinalCalculation = 0;
}
bool IsNull() const
{
return (nBits == 0);
}
uint256 GetHash() const;
uint256 GetMidHash() const;
uint256 FindBestPatternHash(int& collisions,char *scratchpad,int nThreads);
uint256 FindBestPatternHash(int& collisions,char *scratchpad);
int64_t GetBlockTime() const
{
return (int64_t)nTime;
}
};
class CBlock : public CBlockHeader
{
public:
// network and disk
//std::vector<CTransaction> vtx;
std::vector<int> vtx;
// memory only
mutable std::vector<uint256> vMerkleTree;
CBlock()
{
SetNull();
}
CBlock(const CBlockHeader &header)
{
SetNull();
*((CBlockHeader*)this) = header;
}
ADD_SERIALIZE_METHODS;
template <typename Stream, typename Operation>
inline void SerializationOp(Stream& s, Operation ser_action, int nType, int nVersion) {
READWRITE(*(CBlockHeader*)this);
READWRITE(vtx);
}
void SetNull()
{
CBlockHeader::SetNull();
vtx.clear();
vMerkleTree.clear();
}
CBlockHeader GetBlockHeader() const
{
CBlockHeader block;
block.nVersion = nVersion;
block.hashPrevBlock = hashPrevBlock;
block.hashMerkleRoot = hashMerkleRoot;
block.nTime = nTime;
block.nBits = nBits;
block.nNonce = nNonce;
block.nStartLocation = nStartLocation;
block.nFinalCalculation = nFinalCalculation;
return block;
}
std::string ToString() const;
};
/** Describes a place in the block chain to another node such that if the
* other node doesn't have the same branch, it can find a recent common trunk.
* The further back it is, the further before the fork it may be.
*/
struct CBlockLocator
{
std::vector<uint256> vHave;
CBlockLocator() {}
CBlockLocator(const std::vector<uint256>& vHaveIn)
{
vHave = vHaveIn;
}
ADD_SERIALIZE_METHODS;
template <typename Stream, typename Operation>
inline void SerializationOp(Stream& s, Operation ser_action, int nType, int nVersion) {
if (!(nType & SER_GETHASH))
READWRITE(nVersion);
READWRITE(vHave);
}
void SetNull()
{
vHave.clear();
}
bool IsNull() const
{
return vHave.empty();
}
};
#endif // BITCOIN_PRIMITIVES_BLOCK_H

View File

@@ -1,70 +0,0 @@
// Copyright (c) 2014 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_CRYPTO_COMMON_H
#define BITCOIN_CRYPTO_COMMON_H
#if defined(HAVE_CONFIG_H)
#include "bitcoin-config.h"
#endif
#if ((defined(_WIN64) || defined(__WINDOWS__)))
#include "hodl-endian.h"
#endif
#include <stdint.h>
uint16_t static inline ReadLE16(const unsigned char* ptr)
{
return le16toh(*((uint16_t*)ptr));
}
uint32_t static inline ReadLE32(const unsigned char* ptr)
{
return le32toh(*((uint32_t*)ptr));
}
uint64_t static inline ReadLE64(const unsigned char* ptr)
{
return le64toh(*((uint64_t*)ptr));
}
void static inline WriteLE16(unsigned char* ptr, uint16_t x)
{
*((uint16_t*)ptr) = htole16(x);
}
void static inline WriteLE32(unsigned char* ptr, uint32_t x)
{
*((uint32_t*)ptr) = htole32(x);
}
void static inline WriteLE64(unsigned char* ptr, uint64_t x)
{
*((uint64_t*)ptr) = htole64(x);
}
uint32_t static inline ReadBE32(const unsigned char* ptr)
{
return be32toh(*((uint32_t*)ptr));
}
uint64_t static inline ReadBE64(const unsigned char* ptr)
{
return be64toh(*((uint64_t*)ptr));
}
void static inline WriteBE32(unsigned char* ptr, uint32_t x)
{
*((uint32_t*)ptr) = htobe32(x);
}
void static inline WriteBE64(unsigned char* ptr, uint64_t x)
{
*((uint64_t*)ptr) = htobe64(x);
}
#endif // BITCOIN_CRYPTO_COMMON_H
//#endif

View File

@@ -1,83 +0,0 @@
// Copyright (c) 2013-2014 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include "hash.h"
#include "common.h"
#include "hmac_sha512.h"
inline uint32_t ROTL32(uint32_t x, int8_t r)
{
return (x << r) | (x >> (32 - r));
}
unsigned int MurmurHash3(unsigned int nHashSeed, const std::vector<unsigned char>& vDataToHash)
{
// The following is MurmurHash3 (x86_32), see http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
uint32_t h1 = nHashSeed;
if (vDataToHash.size() > 0)
{
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
const int nblocks = vDataToHash.size() / 4;
//----------
// body
const uint8_t* blocks = &vDataToHash[0] + nblocks * 4;
for (int i = -nblocks; i; i++) {
uint32_t k1 = ReadLE32(blocks + i*4);
k1 *= c1;
k1 = ROTL32(k1, 15);
k1 *= c2;
h1 ^= k1;
h1 = ROTL32(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
}
//----------
// tail
const uint8_t* tail = (const uint8_t*)(&vDataToHash[0] + nblocks * 4);
uint32_t k1 = 0;
switch (vDataToHash.size() & 3) {
case 3:
k1 ^= tail[2] << 16;
case 2:
k1 ^= tail[1] << 8;
case 1:
k1 ^= tail[0];
k1 *= c1;
k1 = ROTL32(k1, 15);
k1 *= c2;
h1 ^= k1;
};
}
//----------
// finalization
h1 ^= vDataToHash.size();
h1 ^= h1 >> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >> 16;
return h1;
}
void BIP32Hash(const ChainCode &chainCode, unsigned int nChild, unsigned char header, const unsigned char data[32], unsigned char output[64])
{
unsigned char num[4];
num[0] = (nChild >> 24) & 0xFF;
num[1] = (nChild >> 16) & 0xFF;
num[2] = (nChild >> 8) & 0xFF;
num[3] = (nChild >> 0) & 0xFF;
CHMAC_SHA512(chainCode.begin(), chainCode.size()).Write(&header, 1).Write(data, 32).Write(num, 4).Finalize(output);
}

View File

@@ -1,176 +0,0 @@
// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2013 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_HASH_H
#define BITCOIN_HASH_H
#include <iostream>
//#include "ripemd160.h"
#include "sha256.h"
#include "serialize.h"
#include "hodl_uint256.h"
//#include "version.h"
#include <vector>
static const int PROTOCOL_VERSION = 70002;
typedef uint256 ChainCode;
/** A hasher class for Bitcoin's 256-bit hash (double SHA-256). */
class CHash256 {
private:
CSHA256 sha;
public:
static const size_t OUTPUT_SIZE = CSHA256::OUTPUT_SIZE;
void Finalize(unsigned char hash[OUTPUT_SIZE]) {
unsigned char buf[sha.OUTPUT_SIZE];
sha.Finalize(buf);
sha.Reset().Write(buf, sha.OUTPUT_SIZE).Finalize(hash);
}
CHash256& Write(const unsigned char *data, size_t len) {
sha.Write(data, len);
return *this;
}
CHash256& Reset() {
sha.Reset();
return *this;
}
};
/** A hasher class for Bitcoin's 160-bit hash (SHA-256 + RIPEMD-160). */
/*
class CHash160 {
private:
CSHA256 sha;
public:
static const size_t OUTPUT_SIZE = CRIPEMD160::OUTPUT_SIZE;
void Finalize(unsigned char hash[OUTPUT_SIZE]) {
unsigned char buf[sha.OUTPUT_SIZE];
sha.Finalize(buf);
CRIPEMD160().Write(buf, sha.OUTPUT_SIZE).Finalize(hash);
}
CHash160& Write(const unsigned char *data, size_t len) {
sha.Write(data, len);
return *this;
}
CHash160& Reset() {
sha.Reset();
return *this;
}
};
*/
/** Compute the 256-bit hash of an object. */
template<typename T1>
inline uint256 Hash(const T1 pbegin, const T1 pend)
{
static const unsigned char pblank[1] = {};
uint256 result;
CHash256().Write(pbegin == pend ? pblank : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0]))
.Finalize((unsigned char*)&result);
return result;
}
/** Compute the 256-bit hash of the concatenation of two objects. */
template<typename T1, typename T2>
inline uint256 Hash(const T1 p1begin, const T1 p1end,
const T2 p2begin, const T2 p2end) {
static const unsigned char pblank[1] = {};
uint256 result;
CHash256().Write(p1begin == p1end ? pblank : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0]))
.Write(p2begin == p2end ? pblank : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0]))
.Finalize((unsigned char*)&result);
return result;
}
/** Compute the 256-bit hash of the concatenation of three objects. */
template<typename T1, typename T2, typename T3>
inline uint256 Hash(const T1 p1begin, const T1 p1end,
const T2 p2begin, const T2 p2end,
const T3 p3begin, const T3 p3end) {
static const unsigned char pblank[1] = {};
uint256 result;
CHash256().Write(p1begin == p1end ? pblank : (const unsigned char*)&p1begin[0], (p1end - p1begin) * sizeof(p1begin[0]))
.Write(p2begin == p2end ? pblank : (const unsigned char*)&p2begin[0], (p2end - p2begin) * sizeof(p2begin[0]))
.Write(p3begin == p3end ? pblank : (const unsigned char*)&p3begin[0], (p3end - p3begin) * sizeof(p3begin[0]))
.Finalize((unsigned char*)&result);
return result;
}
/** Compute the 160-bit hash an object. */
/*
template<typename T1>
inline uint160 Hash160(const T1 pbegin, const T1 pend)
{
static unsigned char pblank[1] = {};
uint160 result;
CHash160().Write(pbegin == pend ? pblank : (const unsigned char*)&pbegin[0], (pend - pbegin) * sizeof(pbegin[0]))
.Finalize((unsigned char*)&result);
return result;
}
*/
/** Compute the 160-bit hash of a vector. */
/*
inline uint160 Hash160(const std::vector<unsigned char>& vch)
{
return Hash160(vch.begin(), vch.end());
}
*/
/** A writer stream (for serialization) that computes a 256-bit hash. */
class CHashWriter
{
private:
CHash256 ctx;
public:
int nType;
int nVersion;
CHashWriter(int nTypeIn, int nVersionIn) : nType(nTypeIn), nVersion(nVersionIn) {}
CHashWriter& write(const char *pch, size_t size) {
ctx.Write((const unsigned char*)pch, size);
return (*this);
}
// invalidates the object
uint256 GetHash() {
uint256 result;
ctx.Finalize((unsigned char*)&result);
return result;
}
template<typename T>
CHashWriter& operator<<(const T& obj) {
// Serialize to this stream
::Serialize(*this, obj, nType, nVersion);
return (*this);
}
};
/** Compute the 256-bit hash of an object's serialization. */
template<typename T>
uint256 SerializeHash(const T& obj, int nType=SER_GETHASH, int nVersion=PROTOCOL_VERSION)
{
CHashWriter ss(nType, nVersion);
ss << obj;
return ss.GetHash();
}
unsigned int MurmurHash3(unsigned int nHashSeed, const std::vector<unsigned char>& vDataToHash);
void BIP32Hash(const ChainCode &chainCode, unsigned int nChild, unsigned char header, const unsigned char data[32], unsigned char output[64]);
#endif // BITCOIN_HASH_H

View File

@@ -1,33 +0,0 @@
// Copyright (c) 2014 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include "hmac_sha512.h"
#include <string.h>
CHMAC_SHA512::CHMAC_SHA512(const unsigned char* key, size_t keylen)
{
unsigned char rkey[128];
if (keylen <= 128) {
memcpy(rkey, key, keylen);
memset(rkey + keylen, 0, 128 - keylen);
} else {
CSHA512().Write(key, keylen).Finalize(rkey);
memset(rkey + 64, 0, 64);
}
for (int n = 0; n < 128; n++)
rkey[n] ^= 0x5c;
outer.Write(rkey, 128);
for (int n = 0; n < 128; n++)
rkey[n] ^= 0x5c ^ 0x36;
inner.Write(rkey, 128);
}
void CHMAC_SHA512::Finalize(unsigned char hash[OUTPUT_SIZE])
{
unsigned char temp[64];
inner.Finalize(temp);
outer.Write(temp, 64).Finalize(hash);
}

View File

@@ -1,32 +0,0 @@
// Copyright (c) 2014 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_CRYPTO_HMAC_SHA512_H
#define BITCOIN_CRYPTO_HMAC_SHA512_H
#include "sha512.h"
#include <stdint.h>
#include <stdlib.h>
/** A hasher class for HMAC-SHA-512. */
class CHMAC_SHA512
{
private:
CSHA512 outer;
CSHA512 inner;
public:
static const size_t OUTPUT_SIZE = 64;
CHMAC_SHA512(const unsigned char* key, size_t keylen);
CHMAC_SHA512& Write(const unsigned char* data, size_t len)
{
inner.Write(data, len);
return *this;
}
void Finalize(unsigned char hash[OUTPUT_SIZE]);
};
#endif // BITCOIN_CRYPTO_HMAC_SHA512_H

View File

@@ -1,10 +1,7 @@
#include <memory.h>
#include <stdlib.h>
#include "miner.h"
//#include "algo-gate-api.h"
#include "hodl-gate.h"
#include "hodl.h"
#include "hodl-wolf.h"
#define HODL_NSTARTLOC_INDEX 20
@@ -97,19 +94,20 @@ bool hodl_do_this_thread( int thr_id )
int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,
uint64_t *hashes_done )
{
#ifdef NO_AES_NI
GetPsuedoRandomData( hodl_scratchbuf, work->data, thr_id );
pthread_barrier_wait( &hodl_barrier );
return scanhash_hodl( thr_id, work, max_nonce, hashes_done );
#else
GenRandomGarbage( hodl_scratchbuf, work->data, thr_id );
#ifndef NO_AES_NI
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, thr_id );
pthread_barrier_wait( &hodl_barrier );
return scanhash_hodl_wolf( thr_id, work, max_nonce, hashes_done );
#endif
return false;
}
bool register_hodl_algo( algo_gate_t* gate )
{
#ifdef NO_AES_NI
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
return false;
#endif
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash;

View File

@@ -13,35 +13,41 @@
void GenerateGarbageCore(CacheEntry *Garbage, int ThreadID, int ThreadCount, void *MidHash)
{
#ifdef __AVX__
uint64_t* TempBufs[SHA512_PARALLEL_N];
uint64_t* TempBufs[SHA512_PARALLEL_N] ;
uint64_t* desination[SHA512_PARALLEL_N];
for (int i=0; i<SHA512_PARALLEL_N; ++i) {
for ( int i=0; i<SHA512_PARALLEL_N; ++i )
{
TempBufs[i] = (uint64_t*)malloc(32);
memcpy(TempBufs[i], MidHash, 32);
}
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
for(uint32_t i = StartChunk; i < StartChunk + (TOTAL_CHUNKS / ThreadCount); i+= SHA512_PARALLEL_N) {
for(int j=0; j<SHA512_PARALLEL_N; ++j) {
((uint32_t*)TempBufs[j])[0] = i + j;
desination[j] = (uint64_t*)((uint8_t *)Garbage + ((i+j) * GARBAGE_CHUNK_SIZE));
for ( uint32_t i = StartChunk;
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); i+= SHA512_PARALLEL_N )
{
for ( int j=0; j<SHA512_PARALLEL_N; ++j )
{
( (uint32_t*)TempBufs[j] )[0] = i + j;
desination[j] = (uint64_t*)( (uint8_t *)Garbage + ( (i+j)
* GARBAGE_CHUNK_SIZE ) );
}
sha512Compute32b_parallel(TempBufs, desination);
sha512Compute32b_parallel( TempBufs, desination );
}
for (int i=0; i<SHA512_PARALLEL_N; ++i) {
free(TempBufs[i]);
}
for ( int i=0; i<SHA512_PARALLEL_N; ++i )
free( TempBufs[i] );
#else
uint32_t TempBuf[8];
memcpy(TempBuf, MidHash, 32);
memcpy( TempBuf, MidHash, 32 );
uint32_t StartChunk = ThreadID * (TOTAL_CHUNKS / ThreadCount);
for(uint32_t i = StartChunk; i < StartChunk + (TOTAL_CHUNKS / ThreadCount); ++i)
for ( uint32_t i = StartChunk;
i < StartChunk + (TOTAL_CHUNKS / ThreadCount); ++i )
{
TempBuf[0] = i;
SHA512((uint8_t *)TempBuf, 32, ((uint8_t *)Garbage) + (i * GARBAGE_CHUNK_SIZE));
SHA512( ( uint8_t *)TempBuf, 32,
( (uint8_t *)Garbage ) + ( i * GARBAGE_CHUNK_SIZE ) );
}
#endif
}

View File

@@ -1,168 +0,0 @@
#include "miner.h"
#include "hodl-gate.h"
#include "hodl_uint256.h"
#include "hodl_arith_uint256.h"
#include "block.h"
#include <sstream>
#include "tinyformat.h"
#include <unordered_map>
#include "hash.h"
#include <openssl/aes.h>
#include <openssl/evp.h>
#include <openssl/sha.h>
#define BEGIN(a) ((char*)&(a))
#define END(a) ((char*)&((&(a))[1]))
#define PSUEDORANDOM_DATA_SIZE 30 //2^30 = 1GB
#define PSUEDORANDOM_DATA_CHUNK_SIZE 6 //2^6 = 64 bytes //must be same as SHA512_DIGEST_LENGTH 64
#define L2CACHE_TARGET 12 // 2^12 = 4096 bytes
#define AES_ITERATIONS 15
void SHA512Filler(char *mainMemoryPsuedoRandomData, int threadNumber, uint256 midHash){
//Generate psuedo random data to store in main memory
uint32_t chunks=(1<<(PSUEDORANDOM_DATA_SIZE-PSUEDORANDOM_DATA_CHUNK_SIZE)); //2^(30-6) = 16 mil
uint32_t chunkSize=(1<<(PSUEDORANDOM_DATA_CHUNK_SIZE)); //2^6 = 64 bytes
unsigned char hash_tmp[sizeof(midHash)];
memcpy((char*)&hash_tmp[0], (char*)&midHash, sizeof(midHash) );
uint32_t* index = (uint32_t*)hash_tmp;
// uint32_t chunksToProcess=chunks/totalThreads;
uint32_t chunksToProcess = chunks / opt_n_threads;
uint32_t startChunk=threadNumber*chunksToProcess;
for( uint32_t i = startChunk; i < startChunk+chunksToProcess; i++){
//This changes the first character of hash_tmp
*index = i;
SHA512((unsigned char*)hash_tmp, sizeof(hash_tmp), (unsigned char*)&(mainMemoryPsuedoRandomData[i*chunkSize]));
}
}
extern "C"
// max_nonce is not used by this function
int scanhash_hodl( int threadNumber, struct work* work, uint32_t max_nonce,
uint64_t *hashes_done )
{
unsigned char *mainMemoryPsuedoRandomData = hodl_scratchbuf;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
//retreive target
std::stringstream s;
for (int i = 7; i>=0; i--)
s << strprintf("%08x", ptarget[i]);
//retreive preveios hash
std::stringstream p;
for (int i = 0; i < 8; i++)
p << strprintf("%08x", swab32(pdata[8 - i]));
//retreive merkleroot
std::stringstream m;
for (int i = 0; i < 8; i++)
m << strprintf("%08x", swab32(pdata[16 - i]));
CBlock pblock;
pblock.SetNull();
pblock.nVersion=swab32(pdata[0]);
pblock.nNonce=swab32(pdata[19]);
pblock.nTime=swab32(pdata[17]);
pblock.nBits=swab32(pdata[18]);
pblock.hashPrevBlock=uint256S(p.str());
pblock.hashMerkleRoot=uint256S(m.str());
uint256 hashTarget=uint256S(s.str());
int collisions=0;
uint256 hash;
//Begin AES Search
//Allocate temporary memory
uint32_t cacheMemorySize = (1<<L2CACHE_TARGET); //2^12 = 4096 bytes
uint32_t comparisonSize=(1<<(PSUEDORANDOM_DATA_SIZE-L2CACHE_TARGET)); //2^(30-12) = 256K
unsigned char *cacheMemoryOperatingData;
unsigned char *cacheMemoryOperatingData2;
cacheMemoryOperatingData=new unsigned char[cacheMemorySize+16];
cacheMemoryOperatingData2=new unsigned char[cacheMemorySize];
//Create references to data as 32 bit arrays
uint32_t* cacheMemoryOperatingData32 = (uint32_t*)cacheMemoryOperatingData;
uint32_t* cacheMemoryOperatingData322 = (uint32_t*)cacheMemoryOperatingData2;
//Search for pattern in psuedorandom data
unsigned char key[32] = {0};
unsigned char iv[AES_BLOCK_SIZE];
int outlen1, outlen2;
//Iterate over the data
// int searchNumber=comparisonSize/totalThreads;
int searchNumber = comparisonSize / opt_n_threads;
int startLoc=threadNumber*searchNumber;
EVP_CIPHER_CTX ctx;
for(int32_t k = startLoc;k<startLoc+searchNumber && !work_restart[threadNumber].restart;k++){
//copy data to first l2 cache
memcpy((char*)&cacheMemoryOperatingData[0], (char*)&mainMemoryPsuedoRandomData[k*cacheMemorySize], cacheMemorySize);
for(int j=0;j<AES_ITERATIONS;j++){
//use last 4 bytes of first cache as next location
uint32_t nextLocation = cacheMemoryOperatingData32[(cacheMemorySize/4)-1]%comparisonSize;
//Copy data from indicated location to second l2 cache -
memcpy((char*)&cacheMemoryOperatingData2[0], (char*)&mainMemoryPsuedoRandomData[nextLocation*cacheMemorySize], cacheMemorySize);
//XOR location data into second cache
for(uint32_t i = 0; i < cacheMemorySize/4; i++)
cacheMemoryOperatingData322[i] = cacheMemoryOperatingData32[i] ^ cacheMemoryOperatingData322[i];
memcpy(key,(unsigned char*)&cacheMemoryOperatingData2[cacheMemorySize-32],32);
memcpy(iv,(unsigned char*)&cacheMemoryOperatingData2[cacheMemorySize-AES_BLOCK_SIZE],AES_BLOCK_SIZE);
EVP_EncryptInit(&ctx, EVP_aes_256_cbc(), key, iv);
EVP_EncryptUpdate(&ctx, cacheMemoryOperatingData, &outlen1, cacheMemoryOperatingData2, cacheMemorySize);
EVP_EncryptFinal(&ctx, cacheMemoryOperatingData + outlen1, &outlen2);
EVP_CIPHER_CTX_cleanup(&ctx);
}
//use last X bits as solution
uint32_t solution=cacheMemoryOperatingData32[(cacheMemorySize/4)-1]%comparisonSize;
if(solution<1000){
uint32_t proofOfCalculation=cacheMemoryOperatingData32[(cacheMemorySize/4)-2];
pblock.nStartLocation = k;
pblock.nFinalCalculation = proofOfCalculation;
hash = Hash(BEGIN(pblock.nVersion), END(pblock.nFinalCalculation));
collisions++;
if (UintToArith256(hash) <= UintToArith256(hashTarget) && !work_restart[threadNumber].restart){
pdata[21] = swab32(pblock.nFinalCalculation);
pdata[20] = swab32(pblock.nStartLocation);
*hashes_done = collisions;
//free memory
delete [] cacheMemoryOperatingData;
delete [] cacheMemoryOperatingData2;
return 1;
}
}
}
//free memory
delete [] cacheMemoryOperatingData;
delete [] cacheMemoryOperatingData2;
*hashes_done = collisions;
return 0;
}
extern "C"
void GetPsuedoRandomData( char* mainMemoryPsuedoRandomData, uint32_t *pdata,
int thr_id )
{
//retreive preveios hash
std::stringstream p;
for (int i = 0; i < 8; i++)
p << strprintf("%08x", swab32(pdata[8 - i]));
//retreive merkleroot
std::stringstream m;
for (int i = 0; i < 8; i++)
m << strprintf("%08x", swab32(pdata[16 - i]));
CBlock pblock;
pblock.SetNull();
pblock.nVersion=swab32(pdata[0]);
pblock.nTime=swab32(pdata[17]);
pblock.nBits=swab32(pdata[18]);
pblock.hashPrevBlock= uint256S(p.str());
pblock.hashMerkleRoot= uint256S(m.str());
pblock.nNonce=swab32(pdata[19]);
uint256 midHash = Hash(BEGIN(pblock.nVersion), END(pblock.nNonce));
SHA512Filler( mainMemoryPsuedoRandomData, thr_id, midHash);
}

View File

@@ -1,11 +0,0 @@
extern int scanhash_hodl( int thr_id, struct work* work, uint32_t max_nonce,
uint64_t *hashes_done );
extern void GetPsuedoRandomData( char* mainMemoryPsuedoRandomData,
uint32_t *pdata, int thr_id );
void hodl_set_target( struct work* work, double diff );
void hodl_copy_workdata( struct work* work, struct work* g_work );

Some files were not shown because too many files have changed in this diff Show More