mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
26 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3c02653dbe | ||
![]() |
502ed0b1fe | ||
![]() |
d60a268972 | ||
![]() |
e4265a6f11 | ||
![]() |
a28daca3ce | ||
![]() |
54b8fd7362 | ||
![]() |
ad2275f74a | ||
![]() |
a90d75b8f5 | ||
![]() |
bee78eac76 | ||
![]() |
2d2e54f001 | ||
![]() |
79164c24b5 | ||
![]() |
7a1389998b | ||
![]() |
af1c940919 | ||
![]() |
4b57ac0eb9 | ||
![]() |
6d1361c87f | ||
![]() |
ab39e88318 | ||
![]() |
8ff52e7ad6 | ||
![]() |
aaa48599ad | ||
![]() |
c76574b2cd | ||
![]() |
989fb42d20 | ||
![]() |
710c852f05 | ||
![]() |
39f089d3dc | ||
![]() |
ec4f6028a2 | ||
![]() |
f8907677f6 | ||
![]() |
7544cb956c | ||
![]() |
e7dbd27636 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -11,7 +11,6 @@ autom4te.cache
|
||||
Makefile
|
||||
Makefile.in
|
||||
INSTALL
|
||||
configure
|
||||
configure.lineno
|
||||
depcomp
|
||||
missing
|
||||
|
12
AUTHORS
12
AUTHORS
@@ -16,4 +16,16 @@ LucasJones
|
||||
|
||||
tpruvot@github
|
||||
|
||||
elmad
|
||||
|
||||
djm34
|
||||
|
||||
palmd
|
||||
|
||||
ig0tik3d
|
||||
|
||||
Wolf0
|
||||
|
||||
Optiminer
|
||||
|
||||
Jay D Dee
|
||||
|
34
Dockerfile
34
Dockerfile
@@ -5,19 +5,31 @@
|
||||
# ex: docker run -it --rm cpuminer-opt:latest -a cryptonight -o cryptonight.eu.nicehash.com:3355 -u 1MiningDW2GKzf4VQfmp4q2XoUvR6iy6PD.worker1 -p x -t 3
|
||||
#
|
||||
|
||||
FROM ubuntu:16.04
|
||||
RUN BUILD_DEPS="build-essential \
|
||||
libssl-dev \
|
||||
libgmp-dev \
|
||||
libcurl4-openssl-dev \
|
||||
libjansson-dev \
|
||||
automake" && \
|
||||
# Build
|
||||
FROM ubuntu:16.04 as builder
|
||||
|
||||
apt-get update && \
|
||||
apt-get install -y ${BUILD_DEPS}
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
build-essential \
|
||||
libssl-dev \
|
||||
libgmp-dev \
|
||||
libcurl4-openssl-dev \
|
||||
libjansson-dev \
|
||||
automake \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY . /app/
|
||||
RUN cd /app/ && ./build.sh
|
||||
RUN cd /app/ && ./build.sh
|
||||
|
||||
ENTRYPOINT ["/app/cpuminer"]
|
||||
# App
|
||||
FROM ubuntu:16.04
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y \
|
||||
libcurl3 \
|
||||
libjansson4 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /app/cpuminer .
|
||||
ENTRYPOINT ["./cpuminer"]
|
||||
CMD ["-h"]
|
||||
|
202
Makefile.am
202
Makefile.am
@@ -22,30 +22,6 @@ cpuminer_SOURCES = \
|
||||
api.c \
|
||||
sysinfos.c \
|
||||
algo-gate-api.c\
|
||||
algo/groestl/sph_groestl.c \
|
||||
algo/skein/sph_skein.c \
|
||||
algo/bmw/sph_bmw.c \
|
||||
algo/shavite/sph_shavite.c \
|
||||
algo/shavite/shavite.c \
|
||||
algo/echo/sph_echo.c \
|
||||
algo/blake/sph_blake.c \
|
||||
algo/blake/sph_blake2b.c \
|
||||
algo/heavy/sph_hefty1.c \
|
||||
algo/blake/mod_blakecoin.c \
|
||||
algo/luffa/sph_luffa.c \
|
||||
algo/cubehash/sph_cubehash.c \
|
||||
algo/simd/sph_simd.c \
|
||||
algo/hamsi/sph_hamsi.c \
|
||||
algo/fugue/sph_fugue.c \
|
||||
algo/gost/sph_gost.c \
|
||||
algo/jh/sph_jh.c \
|
||||
algo/keccak/sph_keccak.c \
|
||||
algo/keccak/keccak.c\
|
||||
algo/sha/sph_sha2.c \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/whirlpool/sph_whirlpool.c\
|
||||
crypto/blake2s.c \
|
||||
crypto/oaes_lib.c \
|
||||
crypto/c_keccak.c \
|
||||
crypto/c_groestl.c \
|
||||
@@ -61,82 +37,212 @@ cpuminer_SOURCES = \
|
||||
algo/argon2/ar2/cores.c \
|
||||
algo/argon2/ar2/ar2-scrypt-jane.c \
|
||||
algo/argon2/ar2/blake2b.c \
|
||||
algo/axiom.c \
|
||||
algo/blake/sph_blake.c \
|
||||
algo/blake/blake-hash-4way.c \
|
||||
algo/blake/blake-gate.c \
|
||||
algo/blake/blake.c \
|
||||
algo/blake/blake-4way.c \
|
||||
algo/blake/sph_blake2b.c \
|
||||
algo/blake/blake2b.c \
|
||||
algo/blake/sph-blake2s.c \
|
||||
algo/blake/blake2s-hash-4way.c \
|
||||
algo/blake/blake2s.c \
|
||||
algo/blake/blake2s-gate.c \
|
||||
algo/blake/blake2s-4way.c \
|
||||
algo/blake/blakecoin-gate.c \
|
||||
algo/blake/mod_blakecoin.c \
|
||||
algo/blake/blakecoin.c \
|
||||
algo/blake/blakecoin-4way.c \
|
||||
algo/blake/decred-gate.c \
|
||||
algo/blake/decred.c \
|
||||
algo/blake/decred-4way.c \
|
||||
algo/blake/pentablake-gate.c \
|
||||
algo/blake/pentablake-4way.c \
|
||||
algo/blake/pentablake.c \
|
||||
algo/bmw/sph_bmw.c \
|
||||
algo/bmw/bmw-hash-4way.c \
|
||||
algo/bmw/bmw256.c \
|
||||
algo/cubehash/sse2/cubehash_sse2.c\
|
||||
algo/cryptonight/cryptolight.c \
|
||||
algo/cryptonight/cryptonight-common.c\
|
||||
algo/cryptonight/cryptonight-aesni.c\
|
||||
algo/cryptonight/cryptonight.c\
|
||||
algo/drop.c \
|
||||
algo/cubehash/sph_cubehash.c \
|
||||
algo/cubehash/sse2/cubehash_sse2.c\
|
||||
algo/echo/sph_echo.c \
|
||||
algo/echo/aes_ni/hash.c\
|
||||
algo/fresh.c \
|
||||
algo/gost/sph_gost.c \
|
||||
algo/groestl/sph_groestl.c \
|
||||
algo/groestl/groestl.c \
|
||||
algo/groestl/myrgr-gate.c \
|
||||
algo/groestl/myrgr-4way.c \
|
||||
algo/groestl/myr-groestl.c \
|
||||
algo/groestl/aes_ni/hash-groestl.c \
|
||||
algo/groestl/aes_ni/hash-groestl256.c \
|
||||
algo/haval/haval.c\
|
||||
algo/fugue/sph_fugue.c \
|
||||
algo/hamsi/sph_hamsi.c \
|
||||
algo/hamsi/hamsi-hash-4way.c \
|
||||
algo/haval/haval.c \
|
||||
algo/haval/haval-hash-4way.c \
|
||||
algo/heavy/sph_hefty1.c \
|
||||
algo/heavy/heavy.c \
|
||||
algo/heavy/bastion.c \
|
||||
algo/hmq1725.c \
|
||||
algo/hodl/aes.c \
|
||||
algo/hodl/hodl-gate.c \
|
||||
algo/hodl/hodl-wolf.c \
|
||||
algo/hodl/sha512_avx.c \
|
||||
algo/hodl/sha512_avx2.c \
|
||||
algo/lbry.c \
|
||||
algo/jh/sph_jh.c \
|
||||
algo/jh/jh-hash-4way.c \
|
||||
algo/jh/jha-gate.c \
|
||||
algo/jh/jha-4way.c \
|
||||
algo/jh/jha.c \
|
||||
algo/keccak/sph_keccak.c \
|
||||
algo/keccak/keccak.c\
|
||||
algo/keccak/keccak-hash-4way.c \
|
||||
algo/keccak/keccak-4way.c\
|
||||
algo/keccak/keccak-gate.c \
|
||||
algo/keccak/sse2/keccak.c \
|
||||
algo/luffa/sph_luffa.c \
|
||||
algo/luffa/luffa.c \
|
||||
algo/luffa/sse2/luffa_for_sse2.c \
|
||||
algo/luffa/luffa_for_sse2.c \
|
||||
algo/luffa/luffa-hash-2way.c \
|
||||
algo/lyra2/lyra2.c \
|
||||
algo/lyra2/sponge.c \
|
||||
algo/lyra2/lyra2rev2-gate.c \
|
||||
algo/lyra2/lyra2rev2.c \
|
||||
algo/lyra2/lyra2rev2-4way.c \
|
||||
algo/lyra2/lyra2re.c \
|
||||
algo/lyra2/zcoin.c \
|
||||
algo/lyra2/lyra2z-gate.c \
|
||||
algo/lyra2/lyra2z.c \
|
||||
algo/lyra2/lyra2z-4way.c \
|
||||
algo/lyra2/lyra2z330.c \
|
||||
algo/keccak/sse2/keccak.c \
|
||||
algo/lyra2/lyra2h-gate.c \
|
||||
algo/lyra2/lyra2h.c \
|
||||
algo/lyra2/lyra2h-4way.c \
|
||||
algo/lyra2/allium-gate.c \
|
||||
algo/lyra2/allium-4way.c \
|
||||
algo/lyra2/allium.c \
|
||||
algo/m7m.c \
|
||||
algo/neoscrypt.c \
|
||||
algo/nist5.c \
|
||||
algo/neoscrypt/neoscrypt.c \
|
||||
algo/nist5/nist5-gate.c \
|
||||
algo/nist5/nist5-4way.c \
|
||||
algo/nist5/nist5.c \
|
||||
algo/nist5/zr5.c \
|
||||
algo/pluck.c \
|
||||
algo/quark/quark-gate.c \
|
||||
algo/quark/quark.c \
|
||||
algo/quark/quark-4way.c \
|
||||
algo/quark/anime-gate.c \
|
||||
algo/quark/anime.c \
|
||||
algo/quark/anime-4way.c \
|
||||
algo/qubit/qubit-gate.c \
|
||||
algo/qubit/qubit.c \
|
||||
algo/qubit/qubit-2way.c \
|
||||
algo/qubit/deep-gate.c \
|
||||
algo/qubit/deep-2way.c \
|
||||
algo/qubit/deep.c \
|
||||
algo/ripemd/sph_ripemd.c \
|
||||
algo/ripemd/ripemd-hash-4way.c \
|
||||
algo/ripemd/lbry-gate.c \
|
||||
algo/ripemd/lbry.c \
|
||||
algo/ripemd/lbry-4way.c \
|
||||
algo/scrypt.c \
|
||||
algo/scryptjane/scrypt-jane.c \
|
||||
algo/sha/sph_sha2.c \
|
||||
algo/sha/sph_sha2big.c \
|
||||
algo/sha/sha2-hash-4way.c \
|
||||
algo/sha/sha2.c \
|
||||
algo/sha/sha256t.c \
|
||||
algo/simd/sse2/nist.c \
|
||||
algo/simd/sse2/vector.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/shabal/shabal-hash-4way.c \
|
||||
algo/shavite/sph_shavite.c \
|
||||
algo/shavite/sph-shavite-aesni.c \
|
||||
algo/shavite/shavite.c \
|
||||
algo/simd/sph_simd.c \
|
||||
algo/simd/nist.c \
|
||||
algo/simd/vector.c \
|
||||
algo/simd/simd-hash-2way.c \
|
||||
algo/skein/sph_skein.c \
|
||||
algo/skein/skein-hash-4way.c \
|
||||
algo/skein/skein.c \
|
||||
algo/skein/skein-4way.c \
|
||||
algo/skein/skein-gate.c \
|
||||
algo/skein/skein2.c \
|
||||
algo/s3.c \
|
||||
algo/skein/skein2-4way.c \
|
||||
algo/skein/skein2-gate.c \
|
||||
algo/sm3/sm3.c \
|
||||
algo/sm3/sm3-hash-4way.c \
|
||||
algo/tiger/sph_tiger.c \
|
||||
algo/timetravel.c \
|
||||
algo/veltor.c \
|
||||
algo/whirlpool/sph_whirlpool.c \
|
||||
algo/whirlpool/whirlpool-hash-4way.c \
|
||||
algo/whirlpool/whirlpool-gate.c \
|
||||
algo/whirlpool/whirlpool-4way.c \
|
||||
algo/whirlpool/whirlpool.c \
|
||||
algo/whirlpool/whirlpoolx.c \
|
||||
algo/x11/x11-gate.c \
|
||||
algo/x11/x11.c \
|
||||
algo/x11/x11evo.c \
|
||||
algo/x11/x11-4way.c \
|
||||
algo/x11/x11gost-gate.c \
|
||||
algo/x11/x11gost.c \
|
||||
algo/x11/x11gost-4way.c \
|
||||
algo/x11/c11-gate.c \
|
||||
algo/x11/c11.c \
|
||||
algo/x11/c11-4way.c \
|
||||
algo/x11/tribus-gate.c \
|
||||
algo/x11/tribus.c \
|
||||
algo/x11/tribus-4way.c \
|
||||
algo/x11/timetravel-gate.c \
|
||||
algo/x11/timetravel.c \
|
||||
algo/x11/timetravel-4way.c \
|
||||
algo/x11/timetravel10-gate.c \
|
||||
algo/x11/timetravel10.c \
|
||||
algo/x11/timetravel10-4way.c \
|
||||
algo/x11/fresh.c \
|
||||
algo/x11/x11evo.c \
|
||||
algo/x11/x11evo-4way.c \
|
||||
algo/x11/x11evo-gate.c \
|
||||
algo/x12/x12-gate.c \
|
||||
algo/x12/x12.c \
|
||||
algo/x12/x12-4way.c \
|
||||
algo/x13/x13-gate.c \
|
||||
algo/x13/x13.c \
|
||||
algo/x13/x13-4way.c \
|
||||
algo/x13/x13sm3-gate.c \
|
||||
algo/x13/x13sm3.c \
|
||||
algo/x13/x13sm3-4way.c \
|
||||
algo/x13/phi1612-gate.c \
|
||||
algo/x13/phi1612.c \
|
||||
algo/x13/phi1612-4way.c \
|
||||
algo/x13/skunk-gate.c \
|
||||
algo/x13/skunk-4way.c \
|
||||
algo/x13/skunk.c \
|
||||
algo/x13/drop.c \
|
||||
algo/x14/x14-gate.c \
|
||||
algo/x14/x14.c \
|
||||
algo/x14/x14-4way.c \
|
||||
algo/x14/veltor-gate.c \
|
||||
algo/x14/veltor.c \
|
||||
algo/x14/veltor-4way.c \
|
||||
algo/x14/polytimos-gate.c \
|
||||
algo/x14/polytimos.c \
|
||||
algo/x14/polytimos-4way.c \
|
||||
algo/x14/axiom.c \
|
||||
algo/x15/x15-gate.c \
|
||||
algo/x15/x15.c \
|
||||
algo/x15/x15-4way.c \
|
||||
algo/x17/x17-gate.c \
|
||||
algo/x17/x17.c \
|
||||
algo/xevan.c \
|
||||
algo/x17/x17-4way.c \
|
||||
algo/x17/xevan-gate.c \
|
||||
algo/x17/xevan.c \
|
||||
algo/x17/xevan-4way.c \
|
||||
algo/x17/x16r-gate.c \
|
||||
algo/x17/x16r.c \
|
||||
algo/x17/x16r-4way.c \
|
||||
algo/x17/hmq1725.c \
|
||||
algo/yescrypt/yescrypt.c \
|
||||
algo/yescrypt/yescrypt-common.c \
|
||||
algo/yescrypt/sha256_Y.c\
|
||||
algo/yescrypt/yescrypt-simd.c\
|
||||
algo/zr5.c
|
||||
|
||||
algo/yescrypt/sha256_Y.c \
|
||||
algo/yescrypt/yescrypt-simd.c
|
||||
|
||||
disable_flags =
|
||||
|
||||
|
156
README.md
156
README.md
@@ -13,66 +13,6 @@ mailto://jayddee246@gmail.com
|
||||
|
||||
See file RELEASE_NOTES for change log and compile instructions.
|
||||
|
||||
Supported Algorithms
|
||||
--------------------
|
||||
|
||||
argon2
|
||||
axiom Shabal-256 MemoHash
|
||||
bastion
|
||||
blake Blake-256 (SFR)
|
||||
blakecoin blake256r8
|
||||
blake2s Blake-2 S
|
||||
bmw BMW 256
|
||||
c11 Chaincoin
|
||||
cryptolight Cryptonight-light
|
||||
cryptonight cryptonote, Monero (XMR)
|
||||
decred
|
||||
deep Deepcoin (DCN)
|
||||
drop Dropcoin
|
||||
fresh Fresh
|
||||
groestl dmd-gr, Groestl coin
|
||||
heavy Heavy
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
keccak Keccak
|
||||
lbry LBC, LBRY Credits
|
||||
luffa Luffa
|
||||
lyra2re lyra2
|
||||
lyra2rev2 lyrav2, Vertcoin
|
||||
lyra2z Zcoin (XZC)
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||
m7m Magi (XMG)
|
||||
myr-gr Myriad-Groestl
|
||||
neoscrypt NeoScrypt(128, 2, 1)
|
||||
nist5 Nist5
|
||||
pluck Pluck:128 (Supcoin)
|
||||
pentablake Pentablake
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
scryptjane:nf
|
||||
sha256d Double SHA-256
|
||||
sha256t Triple SHA-256, Onecoin (OC)
|
||||
shavite3 Shavite3
|
||||
skein Skein+Sha (Skeincoin)
|
||||
skein2 Double Skein (Woodcoin)
|
||||
timetravel Machinecoin (MAC)
|
||||
vanilla blake256r8vnl (VCash)
|
||||
veltor
|
||||
whirlpool
|
||||
whirlpoolx
|
||||
x11 Dash
|
||||
x11evo Revolvercoin
|
||||
x11gost sib (SibCoin)
|
||||
x13 X13
|
||||
x14 X14
|
||||
x15 X15
|
||||
x17
|
||||
xevan Bitsend
|
||||
yescrypt
|
||||
zr5 Ziftr
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
@@ -85,13 +25,92 @@ algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
|
||||
Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
|
||||
performance.
|
||||
|
||||
ARM CPUs are not supported.
|
||||
|
||||
2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
|
||||
Centos are known to work and have all dependencies in their repositories.
|
||||
Others may work but may require more effort.
|
||||
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
|
||||
|
||||
3. Stratum pool, cpuminer-opt only supports stratum minning. Some algos
|
||||
may work wallet mining but there are no guarantees.
|
||||
MacOS, OSx is not supported.
|
||||
|
||||
3. Stratum pool. Some algos may work wallet mining using getwork or GBT. YMMV.
|
||||
|
||||
Supported Algorithms
|
||||
--------------------
|
||||
|
||||
allium Garlicoin
|
||||
anime Animecoin
|
||||
argon2
|
||||
axiom Shabal-256 MemoHash
|
||||
bastion
|
||||
blake Blake-256 (SFR)
|
||||
blakecoin blake256r8
|
||||
blake2s Blake-2 S
|
||||
bmw BMW 256
|
||||
c11 Chaincoin
|
||||
cryptolight Cryptonight-light
|
||||
cryptonight cryptonote, Monero (XMR)
|
||||
decred
|
||||
deep Deepcoin (DCN)
|
||||
dmd-gr Diamond-Groestl
|
||||
drop Dropcoin
|
||||
fresh Fresh
|
||||
groestl Groestl coin
|
||||
heavy Heavy
|
||||
hmq1725 Espers
|
||||
hodl Hodlcoin
|
||||
jha Jackpotcoin
|
||||
keccak Maxcoin
|
||||
keccakc Creative coin
|
||||
lbry LBC, LBRY Credits
|
||||
luffa Luffa
|
||||
lyra2h Hppcoin
|
||||
lyra2re lyra2
|
||||
lyra2rev2 lyra2v2, Vertcoin
|
||||
lyra2z Zcoin (XZC)
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||
m7m Magi (XMG)
|
||||
myr-gr Myriad-Groestl
|
||||
neoscrypt NeoScrypt(128, 2, 1)
|
||||
nist5 Nist5
|
||||
pentablake Pentablake
|
||||
phi1612 phi, LUX coin
|
||||
pluck Pluck:128 (Supcoin)
|
||||
polytimos Ninja
|
||||
quark Quark
|
||||
qubit Qubit
|
||||
scrypt scrypt(1024, 1, 1) (default)
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
scryptjane:nf
|
||||
sha256d Double SHA-256
|
||||
sha256t Triple SHA-256, Onecoin (OC)
|
||||
shavite3 Shavite3
|
||||
skein Skein+Sha (Skeincoin)
|
||||
skein2 Double Skein (Woodcoin)
|
||||
skunk Signatum (SIGT)
|
||||
timetravel Machinecoin (MAC)
|
||||
timetravel10 Bitcore
|
||||
tribus Denarius (DNR)
|
||||
vanilla blake256r8vnl (VCash)
|
||||
veltor (VLT)
|
||||
whirlpool
|
||||
whirlpoolx
|
||||
x11 Dash
|
||||
x11evo Revolvercoin
|
||||
x11gost sib (SibCoin)
|
||||
x12 Galaxie Cash (GCH)
|
||||
x13 X13
|
||||
x13sm3 hsr (Hshare)
|
||||
x14 X14
|
||||
x15 X15
|
||||
x16r Ravencoin
|
||||
x17
|
||||
xevan Bitsend
|
||||
yescrypt Globalboost-Y (BSTY)
|
||||
yescryptr8 BitZeny (ZNY)
|
||||
yescryptr16 Yenten (YTN)
|
||||
zr5 Ziftr
|
||||
|
||||
Errata
|
||||
------
|
||||
@@ -114,13 +133,20 @@ forum at:
|
||||
|
||||
https://bitcointalk.org/index.php?topic=1326803.0
|
||||
|
||||
All problem reports must be accompanied by a proper definition.
|
||||
This should include how the problem occurred, the command line and
|
||||
output from the miner showing the startup and any errors.
|
||||
|
||||
Donations
|
||||
---------
|
||||
|
||||
I do not do this for money but I have a donation address if users
|
||||
are so inclined.
|
||||
cpuminer-opt has no fees of any kind but donations are accepted.
|
||||
|
||||
bitcoin:12tdvfF7KmAsihBXQXynT6E6th2c2pByTT?label=donations
|
||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
||||
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
|
||||
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
|
||||
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
|
||||
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
|
||||
|
||||
Happy mining!
|
||||
|
||||
|
30
README.txt
30
README.txt
@@ -1,6 +1,9 @@
|
||||
This file is included in the Windows binary package. Compile instructions
|
||||
for Linux and Windows can be found in RELEASE_NOTES.
|
||||
|
||||
cpuminer is a console program that is executed from a DOS command prompt.
|
||||
There is no GUI and no mouse support.
|
||||
|
||||
Choose the exe that best matches you CPU's features or use trial and
|
||||
error to find the fastest one that doesn't crash. Pay attention to
|
||||
the features listed at cpuminer startup to ensure you are mining at
|
||||
@@ -8,15 +11,26 @@ optimum speed using all the available features.
|
||||
|
||||
Architecture names and compile options used are only provided for Intel
|
||||
Core series. Pentium and Celeron often have fewer features.
|
||||
AMD is YMMV, see previous paragraph.
|
||||
|
||||
Exe name Compile opts Arch name
|
||||
|
||||
cpuminer-sse2.exe -march=core2, Core2
|
||||
cpuminer-sse42.exe -march=corei7, Nehalem
|
||||
cpuminer-aes-sse42.exe -maes -msse4.2 Westmere
|
||||
cpuminer-aes-avx.exe -march=corei7-avx, Sandybridge, Ivybridge
|
||||
cpuminer-aes-avx2.exe -march=core-avx2, Haswell, Broadwell, Skylake, Kabylake
|
||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
|
||||
Exe name Compile flags Arch name
|
||||
|
||||
cpuminer-sse2.exe "-march=core2" Core2, Nehalem
|
||||
cpuminer-aes-sse42.exe "-maes -msse4.2" Westmere
|
||||
cpuminer-aes-avx.exe "-march=corei7-avx" Sandybridge, Ivybridge
|
||||
cpuminer-avx2.exe "-march=core-avx2" Haswell...
|
||||
cpuminer-avx2-sha.exe "-march=core-avx2 -msha" Ryzen
|
||||
|
||||
If you like this software feel free to donate:
|
||||
|
||||
BTC: 12tdvfF7KmAsihBXQXynT6E6th2c2pByTT
|
||||
ETH: 0x72122edabcae9d3f57eab0729305a425f6fef6d0
|
||||
LTC: LdUwoHJnux9r9EKqFWNvAi45kQompHk6e8
|
||||
BCH: 1QKYkB6atn4P7RFozyziAXLEnurwnUM1cQ
|
||||
BTG: GVUyECtRHeC5D58z9F3nGGfVQndwnsPnHQ
|
||||
|
||||
|
||||
|
233
RELEASE_NOTES
233
RELEASE_NOTES
@@ -6,9 +6,31 @@ compile flag.
|
||||
HW SHA support is only available when compiled from source, Windows binaries
|
||||
are not yet available.
|
||||
|
||||
cpuminer-opt is a console program, if you're using a mouse you're doing it
|
||||
wrong.
|
||||
|
||||
Security warning
|
||||
----------------
|
||||
|
||||
Miner programs are often flagged as malware by antivirus programs. This is
|
||||
a false positive, they are flagged simply because they are miners. The source
|
||||
code is open for anyone to inspect. If you don't trust the software, don't use
|
||||
it.
|
||||
|
||||
The cryptographic code has been taken from trusted sources but has been
|
||||
modified for speed at the expense of accepted security practices. This
|
||||
code should not be imported into applications where secure cryptography is
|
||||
required.
|
||||
|
||||
Compile Instructions
|
||||
--------------------
|
||||
|
||||
Requirements:
|
||||
|
||||
Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
|
||||
supported.
|
||||
64 bit Linux or Windows operating system. Apple is not supported.
|
||||
|
||||
Building on linux prerequisites:
|
||||
|
||||
It is assumed users know how to install packages on their system and
|
||||
@@ -25,14 +47,11 @@ are some of the ones that may not be in the default install and need to
|
||||
be installed manually. There may be others, read the error messages they
|
||||
will give a clue as to the missing package.
|
||||
|
||||
The folliwing command should install everything you need on Debian based
|
||||
packages:
|
||||
The following command should install everything you need on Debian based
|
||||
distributions such as Ubuntu:
|
||||
|
||||
sudo apt-get install build-essential libssl-dev libcurl4-openssl-dev libjansson-dev libgmp-dev automake
|
||||
|
||||
Building on Linux, see below for Windows.
|
||||
|
||||
Dependencies
|
||||
|
||||
build-essential (for Ubuntu, Development Tools package group on Fedora)
|
||||
automake
|
||||
@@ -44,9 +63,16 @@ pthreads
|
||||
zlib
|
||||
|
||||
SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and openssl 1.1
|
||||
or higher. Additional compile options may also be required such as
|
||||
or higher. Reports of improved performiance on Ryzen when using openssl 1.0.2
|
||||
have been due to AVX and AVX2 optimizations added to that version.
|
||||
Additional improvements are expected on Ryzen with openssl 1.1.
|
||||
"-march-znver1" or "-msha".
|
||||
|
||||
Additional instructions for static compilalation can be found here:
|
||||
https://lxadm.com/Static_compilation_of_cpuminer
|
||||
Static builds should only considered in a homogeneous HW and SW environment.
|
||||
Local builds will always have the best performance and compatibility.
|
||||
|
||||
Extract cpuminer source.
|
||||
|
||||
tar xvzf cpuminer-opt-x.y.z.tar.gz
|
||||
@@ -58,9 +84,22 @@ Run ./build.sh to build on Linux or execute the following commands.
|
||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
|
||||
make
|
||||
|
||||
Additional optional compile flags, add the following to CFLAGS to activate:
|
||||
|
||||
-DUSE_SPH_SHA
|
||||
|
||||
SPH may give slightly better performance on algos that use sha256 when using
|
||||
openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
|
||||
better than SPH.
|
||||
|
||||
Start mining.
|
||||
|
||||
./cpuminer -a algo ...
|
||||
./cpuminer -a algo -o url -u username -p password
|
||||
|
||||
Windows
|
||||
|
||||
Precompiled Windows binaries are built on a Linux host using Mingw
|
||||
with a more recent compiler than the following Windows hosted procedure.
|
||||
|
||||
Building on Windows prerequisites:
|
||||
|
||||
@@ -92,17 +131,21 @@ or similar Windows program.
|
||||
In msys shell cd to miner directory.
|
||||
cd /c/path/to/cpuminer-opt
|
||||
|
||||
Run winbuild.sh to build on Windows or execute the following commands.
|
||||
Run build.sh to build on Windows or execute the following commands.
|
||||
|
||||
./autogen.sh
|
||||
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
|
||||
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
|
||||
make
|
||||
|
||||
Start mining
|
||||
|
||||
cpuminer.exe -a algo -o url -u user -p password
|
||||
|
||||
The following tips may be useful for older AMD CPUs.
|
||||
|
||||
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
|
||||
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
|
||||
these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
AMD CPUs older than Steamroller, including Athlon x2 and Phenom II x4, are
|
||||
not supported by cpuminer-opt due to an incompatible implementation of SSE2
|
||||
on these CPUs. Some algos may crash the miner with an invalid instruction.
|
||||
Users are recommended to use an unoptimized miner such as cpuminer-multi.
|
||||
|
||||
Some users with AMD CPUs without AES_NI have reported problems compiling
|
||||
@@ -116,6 +159,172 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.8.3
|
||||
|
||||
More restoration of lost lyra2 hash.
|
||||
8 way AVX2 and 4way AVX optimization for blakecoin, vanilla & blake2s.
|
||||
8 way AVX2 for lbry.
|
||||
Scaled hashrate for API output.
|
||||
A couple of GBT fixes.
|
||||
|
||||
v3.8.2.1
|
||||
|
||||
Fixed low difficulty rejects with allium.
|
||||
Fixed qubit AVX2.
|
||||
Restored lyra2z lost hash.
|
||||
Fixed build.sh
|
||||
|
||||
v3.8.2
|
||||
|
||||
Fixed and faster myr-gr.
|
||||
Added x12 algo (Galaxie Cash), allium algo (Garlicoin).
|
||||
Faster lyra2rev2, lbry, skein.
|
||||
Large reduction in compiler warnings.
|
||||
|
||||
v3.8.1.1
|
||||
|
||||
Fixed Windows AVX2 crash.
|
||||
|
||||
v3.8.1
|
||||
|
||||
Fixes x16r on CPUs with only SSE2.
|
||||
More Optimizations for X algos, qubit & deep.
|
||||
Corrected algo optimizations for scrypt and yescrypt, no new optimizations.
|
||||
|
||||
v3.8.0.1
|
||||
|
||||
Fixed x16r AVX2 low hash rate.
|
||||
|
||||
v3.8.0
|
||||
|
||||
4way no longer a seperate feature, included in AVX2.
|
||||
Added x16r algo for Ravencoin, anime algo for Animecoin.
|
||||
More 4way optimizations for X13 and up.
|
||||
Tweaked CPU affinity to better support more than 64 CPUs.
|
||||
Fixed compile problem on some old AMD CPUs.
|
||||
|
||||
v3.7.10
|
||||
|
||||
4way optimizations for lyra2rev2, lyra2h, quark, timetravel8, timetravel10
|
||||
x11evo, blakecoin.
|
||||
Faster x13sm3 (hsr).
|
||||
Added share difficulty to accepted message.
|
||||
|
||||
v3.7.9
|
||||
|
||||
Partial 4way optimizations for veltor, skunk, polytimos, lyra2z.
|
||||
Additional 4way optimizations for X algos.
|
||||
New algo yescryptr8 for BitZeny, not to be confused with original
|
||||
yescrypt Globalboost-Y.
|
||||
|
||||
v3.7.8
|
||||
|
||||
Partial 4way optimization for most X algos including c11, xevan, phi, hsr
|
||||
|
||||
v3.7.7
|
||||
|
||||
Fixed regression caused by 64 CPU support.
|
||||
Fixed lyra2h.
|
||||
|
||||
v3.7.6
|
||||
|
||||
Added lyra2h algo for Hppcoin.
|
||||
Added support for more than 64 CPUs.
|
||||
Optimized shavite512 with AES, improves x11 etc.
|
||||
|
||||
v3.7.5
|
||||
|
||||
New algo keccakc for Creative coin with 4way optimizations
|
||||
|
||||
Rewrote some AVX/AVX2 code for more consistent implementation and some
|
||||
optimizing.
|
||||
|
||||
Enhanced capabilities check to support 4way, more precise reporting of
|
||||
features (not all algos use SSE2), and better error messages when using
|
||||
an incompatible pre-built version (Windows users).
|
||||
|
||||
v3.7.4
|
||||
|
||||
Removed unnecessary build options.
|
||||
|
||||
Added 4way support for tribus and nist5.
|
||||
|
||||
v3.7.3
|
||||
|
||||
Added polytimos algo.
|
||||
|
||||
Introducing 4-way AVX2 optimization giving up to 4x performance inprovement
|
||||
on many compute bound algos. First supported algos: skein, skein2, blake &
|
||||
keccak. This feature is only available when compiled from source. See above
|
||||
for instcuctions how to enable 4-way during compilation.
|
||||
|
||||
Updated Dockerfile.
|
||||
|
||||
v3.7.2
|
||||
|
||||
Fixed yescryptr16
|
||||
Changed default sha256 and sha512 to openssl. This should be used when
|
||||
compiling with openssl 1.0.2 or higher (Ubuntu 16.04).
|
||||
This should increase the hashrate for yescrypt, yescryptr16, m7m, xevan, skein,
|
||||
myr-gr & others when openssl 1.0.2 is installed.
|
||||
Users with openssl 1.0.1 (Ubuntu 14.04) may get better perforance by adding
|
||||
"-DUSE_SPH_SHA" to CLAGS.
|
||||
Windows binaries are compiled with -DUSE_SPH_SHA and won't get the speedup.
|
||||
|
||||
v3.7.1
|
||||
|
||||
Added yescryptr16 algo for Yenten coin
|
||||
Added SHA support to yescrypt and yescryptr16
|
||||
Small code cleanup
|
||||
|
||||
v3.7.0
|
||||
|
||||
Fixed x14 misalignment bug.
|
||||
Fixed decred stake version bug.
|
||||
Getwork fixes for algos that use big endian data encoding: m7m, zr5, neoscrypt,
|
||||
decred.
|
||||
|
||||
v3.6.10
|
||||
|
||||
Fixed misalignment bug in hsr.
|
||||
|
||||
v3.6.9
|
||||
|
||||
Added phi1612 algo for LUX coin
|
||||
Added x13sm3 algo, alias hsr, for Hshare coin
|
||||
|
||||
v3.6.8
|
||||
|
||||
Fixed timetravel10 on Windows.
|
||||
|
||||
v3.6.7
|
||||
|
||||
Skunk algo added.
|
||||
Tribus a little faster.
|
||||
Minor restructuring.
|
||||
|
||||
v3.6.6
|
||||
|
||||
added tribus algo for Denarius (DNR)
|
||||
|
||||
configure removed from .gitignore. This should allow git clone to compile
|
||||
on Windows/mingw.
|
||||
|
||||
Fixed CPU temperature monitoring on some CPUs (Linux only).
|
||||
|
||||
Fixed a compile error on FreeBSD (unsupported YMMV).
|
||||
|
||||
v3.6.5
|
||||
|
||||
Cryptonight a little faster.
|
||||
Added jha algo (Jackpotcoin) with AES optimizations.
|
||||
|
||||
v3.6.4
|
||||
|
||||
Added support for Bitcore (BTX) using the timetravel10 algo, optimized for
|
||||
AES and AVX2.
|
||||
"-a bitcore" works as an alias and is less typing that "-a timetravel10".
|
||||
|
||||
v3.6.3
|
||||
|
||||
Fixed all known issues with SHA support on AMD Ryzen CPUs, still no
|
||||
|
216
algo-gate-api.c
216
algo-gate-api.c
@@ -16,7 +16,7 @@
|
||||
#include <memory.h>
|
||||
#include <unistd.h>
|
||||
#include <openssl/sha.h>
|
||||
#include "miner.h"
|
||||
//#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
// Define null and standard functions.
|
||||
@@ -77,6 +77,12 @@ void algo_not_tested()
|
||||
applog(LOG_WARNING,"and bad things may happen. Use at your own risk.");
|
||||
}
|
||||
|
||||
void four_way_not_tested()
|
||||
{
|
||||
applog( LOG_WARNING,"Algo %s has not been tested using 4way. It may not", algo_names[opt_algo] );
|
||||
applog( LOG_WARNING,"work or may be slower. Please report your results.");
|
||||
}
|
||||
|
||||
void algo_not_implemented()
|
||||
{
|
||||
applog(LOG_ERR,"Algo %s has not been Implemented.",algo_names[opt_algo]);
|
||||
@@ -114,8 +120,8 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
|
||||
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
|
||||
gate->set_target = (void*)&std_set_target;
|
||||
gate->work_decode = (void*)&std_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_submit_getwork_result;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
|
||||
gate->build_extraheader = (void*)&std_build_extraheader;
|
||||
gate->set_work_data_endian = (void*)&do_nothing;
|
||||
gate->calc_network_diff = (void*)&std_calc_network_diff;
|
||||
@@ -124,7 +130,7 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->do_this_thread = (void*)&return_true;
|
||||
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
||||
gate->stratum_handle_response = (void*)&std_stratum_handle_response;
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->optimizations = EMPTY_SET;
|
||||
gate->ntime_index = STD_NTIME_INDEX;
|
||||
gate->nbits_index = STD_NBITS_INDEX;
|
||||
gate->nonce_index = STD_NONCE_INDEX;
|
||||
@@ -132,6 +138,10 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->work_cmp_size = STD_WORK_CMP_SIZE;
|
||||
}
|
||||
|
||||
// Ignore warnings for not yet defined register functions
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wimplicit-function-declaration"
|
||||
|
||||
// called by each thread that uses the gate
|
||||
bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
{
|
||||
@@ -145,72 +155,77 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
|
||||
switch (algo)
|
||||
{
|
||||
|
||||
// Ignore warnings for not yet defined register fucntions
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wimplicit-function-declaration"
|
||||
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_zcoin_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
case ALGO_S3: register_s3_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
|
||||
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
|
||||
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_sib_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
|
||||
// restore warnings
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL10: register_timetravel10_algo( gate ); break;
|
||||
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
|
||||
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
|
||||
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
return false;
|
||||
@@ -225,6 +240,9 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
return true;
|
||||
}
|
||||
|
||||
// restore warnings
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
// override std defaults with jr2 defaults
|
||||
bool register_json_rpc2( algo_gate_t *gate )
|
||||
{
|
||||
@@ -253,42 +271,48 @@ void exec_hash_function( int algo, void *output, const void *pdata )
|
||||
gate.hash( output, pdata, 0 );
|
||||
}
|
||||
|
||||
// an algo can have multiple aliases but the aliases must be unique
|
||||
|
||||
#define PROPER (1)
|
||||
#define ALIAS (0)
|
||||
|
||||
// The only difference between the alias and the proper algo name is the
|
||||
// proper name is the one that is defined in ALGO_NAMES, there may be
|
||||
// proper name is the one that is defined in ALGO_NAMES. There may be
|
||||
// multiple aliases that map to the same proper name.
|
||||
// New aliases can be added anywhere in the array as long as NULL is last.
|
||||
// Alphabetic order of alias is recommended.
|
||||
const char* const algo_alias_map[][2] =
|
||||
{
|
||||
// alias proper
|
||||
{ "blake256r8", "blakecoin" },
|
||||
{ "blake256r8vnl", "vanilla" },
|
||||
{ "sia", "blake2b" },
|
||||
{ "blake256r14", "blake" },
|
||||
{ "blake256r14dcr", "decred" },
|
||||
{ "cryptonote", "cryptonight" },
|
||||
{ "cryptonight-light", "cryptolight" },
|
||||
{ "diamond", "dmd-gr" },
|
||||
{ "droplp", "drop" },
|
||||
{ "espers", "hmq1725" },
|
||||
{ "flax", "c11" },
|
||||
{ "jane", "scryptjane" },
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2zoin", "lyra2z330" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
{ "sib", "x11gost" },
|
||||
{ "yes", "yescrypt" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ "zcoin", "lyra2z" },
|
||||
{ "zoin", "lyra2z330" },
|
||||
{ NULL, NULL }
|
||||
{ "bitcore", "timetravel10" },
|
||||
{ "bitzeny", "yescryptr8" },
|
||||
{ "blake256r8", "blakecoin" },
|
||||
{ "blake256r8vnl", "vanilla" },
|
||||
{ "blake256r14", "blake" },
|
||||
{ "blake256r14dcr", "decred" },
|
||||
{ "cryptonote", "cryptonight" },
|
||||
{ "cryptonight-light", "cryptolight" },
|
||||
{ "diamond", "dmd-gr" },
|
||||
{ "droplp", "drop" },
|
||||
{ "espers", "hmq1725" },
|
||||
{ "flax", "c11" },
|
||||
{ "hsr", "x13sm3" },
|
||||
{ "jackpot", "jha" },
|
||||
{ "jane", "scryptjane" },
|
||||
{ "lyra2", "lyra2re" },
|
||||
{ "lyra2v2", "lyra2rev2" },
|
||||
{ "lyra2zoin", "lyra2z330" },
|
||||
{ "myrgr", "myr-gr" },
|
||||
{ "myriad", "myr-gr" },
|
||||
{ "neo", "neoscrypt" },
|
||||
{ "phi", "phi1612" },
|
||||
// { "sia", "blake2b" },
|
||||
{ "sib", "x11gost" },
|
||||
{ "timetravel8", "timetravel" },
|
||||
{ "ziftr", "zr5" },
|
||||
{ "yenten", "yescryptr16" },
|
||||
{ "yescryptr8k", "yescrypt" },
|
||||
{ "zcoin", "lyra2z" },
|
||||
{ "zoin", "lyra2z330" },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
// if arg is a valid alias for a known algo it is updated with the proper name.
|
||||
|
@@ -1,7 +1,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
/////////////////////////////
|
||||
@@ -85,12 +84,13 @@
|
||||
|
||||
typedef uint32_t set_t;
|
||||
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define AVX_OPT 4
|
||||
#define AVX2_OPT 8
|
||||
#define SHA_OPT 16
|
||||
#define EMPTY_SET 0
|
||||
#define SSE2_OPT 1
|
||||
#define AES_OPT 2
|
||||
#define AVX_OPT 4
|
||||
#define AVX2_OPT 8
|
||||
#define SHA_OPT 0x10
|
||||
//#define FOUR_WAY_OPT 0x20
|
||||
|
||||
// return set containing all elements from sets a & b
|
||||
inline set_t set_union ( set_t a, set_t b ) { return a | b; }
|
||||
@@ -156,7 +156,7 @@ bool return_false();
|
||||
void *return_null();
|
||||
void algo_not_tested();
|
||||
void algo_not_implemented();
|
||||
|
||||
void four_way_not_tested();
|
||||
|
||||
// Warning: algo_gate.nonce_index should only be used in targetted code
|
||||
// due to different behaviours by different targets. The JR2 index uses an
|
||||
@@ -212,21 +212,24 @@ int64_t get_max64_0x3fffffLL();
|
||||
int64_t get_max64_0x1ffff();
|
||||
int64_t get_max64_0xffffLL();
|
||||
|
||||
void std_set_target ( struct work *work, double job_diff );
|
||||
void std_set_target( struct work *work, double job_diff );
|
||||
void alt_set_target( struct work* work, double job_diff );
|
||||
void scrypt_set_target( struct work *work, double job_diff );
|
||||
|
||||
bool std_work_decode( const json_t *val, struct work *work );
|
||||
bool std_le_work_decode( const json_t *val, struct work *work );
|
||||
bool std_be_work_decode( const json_t *val, struct work *work );
|
||||
bool jr2_work_decode( const json_t *val, struct work *work );
|
||||
|
||||
bool std_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
|
||||
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
|
||||
|
||||
void std_le_build_stratum_request( char *req, struct work *work );
|
||||
void std_be_build_stratum_request( char *req, struct work *work );
|
||||
void jr2_build_stratum_request ( char *req, struct work *work );
|
||||
|
||||
// set_work_data_endian target, default is do_nothing;
|
||||
void swab_work_data( struct work *work );
|
||||
// Default is do_nothing (assumed LE)
|
||||
void set_work_data_big_endian( struct work *work );
|
||||
|
||||
double std_calc_network_diff( struct work *work );
|
||||
|
||||
|
@@ -1,5 +1,3 @@
|
||||
#include "miner.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
143
algo/blake/blake-4way.c
Normal file
143
algo/blake/blake-4way.c
Normal file
@@ -0,0 +1,143 @@
|
||||
#include "blake-gate.h"
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#if defined (BLAKE_4WAY)
|
||||
|
||||
blake256r14_4way_context blake_4w_ctx;
|
||||
|
||||
void blakehash_4way(void *state, const void *input)
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r14_4way_context ctx;
|
||||
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
|
||||
blake256r14_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4way_close( &ctx, vhash );
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
blake256r14_4way_init( &blake_4w_ctx );
|
||||
blake256r14_4way( &blake_4w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
|
||||
blakehash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(BLAKE_8WAY)
|
||||
|
||||
blake256r14_8way_context blake_8w_ctx;
|
||||
|
||||
void blakehash_8way( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256r14_8way_context ctx;
|
||||
memcpy( &ctx, &blake_8w_ctx, sizeof ctx );
|
||||
blake256r14_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r14_8way_close( &ctx, vhash );
|
||||
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
|
||||
blake256r14_8way_init( &blake_8w_ctx );
|
||||
blake256r14_8way( &blake_8w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
pdata[19] = n;
|
||||
|
||||
blakehash_8way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
|
||||
{
|
||||
found[i] = true;
|
||||
num_found++;
|
||||
nonces[i] = n+i;
|
||||
work_set_target_ratio( work, hash+1 );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
26
algo/blake/blake-gate.c
Normal file
26
algo/blake/blake-gate.c
Normal file
@@ -0,0 +1,26 @@
|
||||
#include "blake-gate.h"
|
||||
|
||||
int64_t blake_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blake_get_max64;
|
||||
//#if defined (__AVX2__) && defined (FOUR_WAY)
|
||||
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
|
||||
// gate->scanhash = (void*)&scanhash_blake_8way;
|
||||
// gate->hash = (void*)&blakehash_8way;
|
||||
#if defined(BLAKE_4WAY)
|
||||
four_way_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_blake_4way;
|
||||
gate->hash = (void*)&blakehash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_blake;
|
||||
gate->hash = (void*)&blakehash;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
21
algo/blake/blake-gate.h
Normal file
21
algo/blake/blake-gate.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef __BLAKE_GATE_H__
|
||||
#define __BLAKE_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define BLAKE_4WAY
|
||||
#endif
|
||||
|
||||
#if defined (BLAKE_4WAY)
|
||||
void blakehash_4way(void *state, const void *input);
|
||||
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void blakehash( void *state, const void *input );
|
||||
int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
1510
algo/blake/blake-hash-4way.c
Normal file
1510
algo/blake/blake-hash-4way.c
Normal file
File diff suppressed because it is too large
Load Diff
143
algo/blake/blake-hash-4way.h
Normal file
143
algo/blake/blake-hash-4way.h
Normal file
@@ -0,0 +1,143 @@
|
||||
/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */
|
||||
/**
|
||||
* BLAKE interface. BLAKE is a family of functions which differ by their
|
||||
* output size; this implementation defines BLAKE for output sizes 224,
|
||||
* 256, 384 and 512 bits. This implementation conforms to the "third
|
||||
* round" specification.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_blake.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef __BLAKE_HASH_4WAY__
|
||||
#define __BLAKE_HASH_4WAY__ 1
|
||||
|
||||
#ifdef __AVX__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#define SPH_SIZE_blake256 256
|
||||
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
// With AVX only Blake-256 4 way is available.
|
||||
// With AVX2 Blake-256 8way & Blake-512 4 way are also available.
|
||||
|
||||
// Blake-256 4 way
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[16] __attribute__ ((aligned (64)));
|
||||
__m128i H[8];
|
||||
__m128i S[4];
|
||||
size_t ptr;
|
||||
sph_u32 T0, T1;
|
||||
int rounds; // 14 for blake, 8 for blakecoin & vanilla
|
||||
} blake_4way_small_context;
|
||||
|
||||
// Default 14 rounds
|
||||
typedef blake_4way_small_context blake256_4way_context;
|
||||
void blake256_4way_init(void *cc);
|
||||
void blake256_4way(void *cc, const void *data, size_t len);
|
||||
void blake256_4way_close(void *cc, void *dst);
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
typedef blake_4way_small_context blake256r14_4way_context;
|
||||
void blake256r14_4way_init(void *cc);
|
||||
void blake256r14_4way(void *cc, const void *data, size_t len);
|
||||
void blake256r14_4way_close(void *cc, void *dst);
|
||||
|
||||
// 8 rounds, blakecoin, vanilla
|
||||
typedef blake_4way_small_context blake256r8_4way_context;
|
||||
void blake256r8_4way_init(void *cc);
|
||||
void blake256r8_4way(void *cc, const void *data, size_t len);
|
||||
void blake256r8_4way_close(void *cc, void *dst);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
// Blake-256 8 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16] __attribute__ ((aligned (64)));
|
||||
__m256i H[8];
|
||||
__m256i S[4];
|
||||
size_t ptr;
|
||||
sph_u32 T0, T1;
|
||||
int rounds; // 14 for blake, 8 for blakecoin & vanilla
|
||||
} blake_8way_small_context;
|
||||
|
||||
// Default 14 rounds
|
||||
typedef blake_8way_small_context blake256_8way_context;
|
||||
void blake256_8way_init(void *cc);
|
||||
void blake256_8way(void *cc, const void *data, size_t len);
|
||||
void blake256_8way_close(void *cc, void *dst);
|
||||
|
||||
// 14 rounds, blake, decred
|
||||
typedef blake_8way_small_context blake256r14_8way_context;
|
||||
void blake256r14_8way_init(void *cc);
|
||||
void blake256r14_8way(void *cc, const void *data, size_t len);
|
||||
void blake256r14_8way_close(void *cc, void *dst);
|
||||
|
||||
// 8 rounds, blakecoin, vanilla
|
||||
typedef blake_8way_small_context blake256r8_8way_context;
|
||||
void blake256r8_8way_init(void *cc);
|
||||
void blake256r8_8way(void *cc, const void *data, size_t len);
|
||||
void blake256r8_8way_close(void *cc, void *dst);
|
||||
|
||||
// Blake-512 4 way
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16] __attribute__ ((aligned (64)));
|
||||
__m256i H[8];
|
||||
__m256i S[4];
|
||||
size_t ptr;
|
||||
sph_u64 T0, T1;
|
||||
} blake_4way_big_context;
|
||||
|
||||
typedef blake_4way_big_context blake512_4way_context;
|
||||
|
||||
void blake512_4way_init(void *cc);
|
||||
void blake512_4way(void *cc, const void *data, size_t len);
|
||||
void blake512_4way_close(void *cc, void *dst);
|
||||
void blake512_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -1,4 +1,3 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
@@ -90,18 +89,3 @@ int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_blake;
|
||||
gate->hash = (void*)&blakehash;
|
||||
gate->get_max64 = (void*)&blake_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -3,16 +3,13 @@
|
||||
* tpruvot@github 2015-2016
|
||||
*/
|
||||
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "algo/blake/sph_blake2b.h"
|
||||
|
||||
|
||||
static __thread sph_blake2b_ctx s_midstate;
|
||||
static __thread sph_blake2b_ctx s_ctx;
|
||||
//static __thread sph_blake2b_ctx s_midstate;
|
||||
//static __thread sph_blake2b_ctx s_ctx;
|
||||
#define MIDLEN 76
|
||||
#define A 64
|
||||
|
||||
@@ -28,6 +25,7 @@ void blake2b_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
/*
|
||||
static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
|
||||
{
|
||||
s_ctx.outlen = MIDLEN;
|
||||
@@ -35,6 +33,7 @@ static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
|
||||
sph_blake2b_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
|
||||
sph_blake2b_final(&s_ctx, (uint8_t*) output);
|
||||
}
|
||||
*/
|
||||
|
||||
int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
@@ -220,6 +219,8 @@ bool register_blake2b_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&blake2b_hash;
|
||||
gate->calc_network_diff = (void*)&blake2b_calc_network_diff;
|
||||
gate->build_stratum_request = (void*)&blake2b_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->build_extraheader = (void*)&blake2b_build_extraheader;
|
||||
gate->get_new_work = (void*)&blake2b_get_new_work;
|
||||
gate->get_max64 = (void*)&blake2b_get_max64;
|
||||
|
134
algo/blake/blake2s-4way.c
Normal file
134
algo/blake/blake2s-4way.c
Normal file
@@ -0,0 +1,134 @@
|
||||
#include "blake2s-gate.h"
|
||||
#include "blake2s-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
|
||||
static __thread blake2s_8way_state blake2s_8w_ctx;
|
||||
|
||||
void blake2s_8way_hash( void *output, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake2s_8way_state ctx;
|
||||
memcpy( &ctx, &blake2s_8w_ctx, sizeof ctx );
|
||||
|
||||
blake2s_8way_update( &ctx, input + (64<<3), 16 );
|
||||
blake2s_8way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
|
||||
|
||||
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_8way_update( &blake2s_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
pdata[19] = n;
|
||||
|
||||
blake2s_8way_hash( hash, vdata );
|
||||
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#elif defined(BLAKE2S_4WAY)
|
||||
|
||||
static __thread blake2s_4way_state blake2s_4w_ctx;
|
||||
|
||||
void blake2s_4way_hash( void *output, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake2s_4way_state ctx;
|
||||
memcpy( &ctx, &blake2s_4w_ctx, sizeof ctx );
|
||||
|
||||
blake2s_4way_update( &ctx, input + (64<<2), 16 );
|
||||
blake2s_4way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
|
||||
|
||||
mm_deinterleave_4x32( output, output+32, output+64, output+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
blake2s_4way_init( &blake2s_4w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_4way_update( &blake2s_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
pdata[19] = n;
|
||||
|
||||
blake2s_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
27
algo/blake/blake2s-gate.c
Normal file
27
algo/blake/blake2s-gate.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include "blake2s-gate.h"
|
||||
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
}
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_blake2s_8way;
|
||||
gate->hash = (void*)&blake2s_8way_hash;
|
||||
#elif defined(BLAKE2S_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_blake2s_4way;
|
||||
gate->hash = (void*)&blake2s_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_blake2s;
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
#endif
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
35
algo/blake/blake2s-gate.h
Normal file
35
algo/blake/blake2s-gate.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef __BLAKE2S_GATE_H__
|
||||
#define __BLAKE2S_GATE_H__ 1
|
||||
|
||||
#include <stdint.h>
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__AVX__)
|
||||
#define BLAKE2S_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define BLAKE2S_8WAY
|
||||
#endif
|
||||
|
||||
bool register_blake2s_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
|
||||
void blake2s_8way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#elif defined (BLAKE2S_4WAY)
|
||||
|
||||
void blake2s_4way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#else
|
||||
|
||||
void blake2s_hash( void *state, const void *input );
|
||||
int scanhash_blake2s( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
362
algo/blake/blake2s-hash-4way.c
Normal file
362
algo/blake/blake2s-hash-4way.c
Normal file
@@ -0,0 +1,362 @@
|
||||
/**
|
||||
* BLAKE2 reference source code package - reference C implementations
|
||||
*
|
||||
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
* and related and neighboring rights to this software to the public domain
|
||||
* worldwide. This software is distributed without any warranty.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
|
||||
#include "blake2s-hash-4way.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||||
};
|
||||
|
||||
static const uint8_t blake2s_sigma[10][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
};
|
||||
|
||||
// define a constant for initial param.
|
||||
|
||||
int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen )
|
||||
{
|
||||
blake2s_nway_param P[1];
|
||||
|
||||
P->digest_length = outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
P->leaf_length = 0;
|
||||
*((uint64_t*)(P->node_offset)) = 0;
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
memset( S, 0, sizeof( blake2s_4way_state ) );
|
||||
for( int i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm_set1_epi32( blake2s_IV[i] );
|
||||
|
||||
uint32_t *p = ( uint32_t * )( P );
|
||||
|
||||
/* IV XOR ParamBlock */
|
||||
for ( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm_xor_si128( S->h[i], _mm_set1_epi32( p[i] ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
|
||||
{
|
||||
__m128i m[16];
|
||||
__m128i v[16];
|
||||
|
||||
memcpy_128( m, block, 16 );
|
||||
memcpy_128( v, S->h, 8 );
|
||||
|
||||
v[ 8] = _mm_set1_epi32( blake2s_IV[0] );
|
||||
v[ 9] = _mm_set1_epi32( blake2s_IV[1] );
|
||||
v[10] = _mm_set1_epi32( blake2s_IV[2] );
|
||||
v[11] = _mm_set1_epi32( blake2s_IV[3] );
|
||||
v[12] = _mm_xor_si128( _mm_set1_epi32( S->t[0] ),
|
||||
_mm_set1_epi32( blake2s_IV[4] ) );
|
||||
v[13] = _mm_xor_si128( _mm_set1_epi32( S->t[1] ),
|
||||
_mm_set1_epi32( blake2s_IV[5] ) );
|
||||
v[14] = _mm_xor_si128( _mm_set1_epi32( S->f[0] ),
|
||||
_mm_set1_epi32( blake2s_IV[6] ) );
|
||||
v[15] = _mm_xor_si128( _mm_set1_epi32( S->f[1] ),
|
||||
_mm_set1_epi32( blake2s_IV[7] ) );
|
||||
|
||||
#define G4W(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 16 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 12 ); \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm_rotr_32( _mm_xor_si128( d, a ), 8 ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
b = mm_rotr_32( _mm_xor_si128( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND4W(r) \
|
||||
do { \
|
||||
G4W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G4W( r, 1, v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G4W( r, 2, v[ 2], v[ 6], v[10], v[14] ); \
|
||||
G4W( r, 3, v[ 3], v[ 7], v[11], v[15] ); \
|
||||
G4W( r, 4, v[ 0], v[ 5], v[10], v[15] ); \
|
||||
G4W( r, 5, v[ 1], v[ 6], v[11], v[12] ); \
|
||||
G4W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
|
||||
G4W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
|
||||
} while(0)
|
||||
|
||||
ROUND4W( 0 );
|
||||
ROUND4W( 1 );
|
||||
ROUND4W( 2 );
|
||||
ROUND4W( 3 );
|
||||
ROUND4W( 4 );
|
||||
ROUND4W( 5 );
|
||||
ROUND4W( 6 );
|
||||
ROUND4W( 7 );
|
||||
ROUND4W( 8 );
|
||||
ROUND4W( 9 );
|
||||
|
||||
for( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm_xor_si128( _mm_xor_si128( S->h[i], v[i] ), v[i + 8] );
|
||||
|
||||
#undef G4W
|
||||
#undef ROUND4W
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_4way_update( blake2s_4way_state *S, const void *in,
|
||||
uint64_t inlen )
|
||||
{
|
||||
__m128i *input = (__m128i*)in;
|
||||
__m128i *buf = (__m128i*)S->buf;
|
||||
const int bsize = BLAKE2S_BLOCKBYTES;
|
||||
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
if( inlen >= bsize - left )
|
||||
{
|
||||
memcpy_128( buf + (left>>2), input, (bsize - left) >> 2 );
|
||||
S->buflen += bsize - left;
|
||||
S->t[0] += BLAKE2S_BLOCKBYTES;
|
||||
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
|
||||
blake2s_4way_compress( S, buf );
|
||||
S->buflen = 0;
|
||||
input += ( bsize >> 2 );
|
||||
inlen -= bsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_128( buf + ( left>>2 ), input, inlen>>2 );
|
||||
S->buflen += (size_t) inlen;
|
||||
input += ( inlen>>2 );
|
||||
inlen -= inlen;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen )
|
||||
{
|
||||
__m128i *buf = (__m128i*)S->buf;
|
||||
|
||||
S->t[0] += S->buflen;
|
||||
S->t[1] += ( S->t[0] < S->buflen );
|
||||
if ( S->last_node )
|
||||
S->f[1] = ~0U;
|
||||
S->f[0] = ~0U;
|
||||
|
||||
memset_zero_128( buf + ( S->buflen>>2 ),
|
||||
( BLAKE2S_BLOCKBYTES - S->buflen ) >> 2 );
|
||||
blake2s_4way_compress( S, buf );
|
||||
|
||||
for ( int i = 0; i < 8; ++i )
|
||||
casti_m128i( out, i ) = S->h[ i ];
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
int blake2s_8way_compress( blake2s_8way_state *S, const __m256i *block )
|
||||
{
|
||||
__m256i m[16];
|
||||
__m256i v[16];
|
||||
|
||||
memcpy_256( m, block, 16 );
|
||||
memcpy_256( v, S->h, 8 );
|
||||
|
||||
v[ 8] = _mm256_set1_epi32( blake2s_IV[0] );
|
||||
v[ 9] = _mm256_set1_epi32( blake2s_IV[1] );
|
||||
v[10] = _mm256_set1_epi32( blake2s_IV[2] );
|
||||
v[11] = _mm256_set1_epi32( blake2s_IV[3] );
|
||||
v[12] = _mm256_xor_si256( _mm256_set1_epi32( S->t[0] ),
|
||||
_mm256_set1_epi32( blake2s_IV[4] ) );
|
||||
v[13] = _mm256_xor_si256( _mm256_set1_epi32( S->t[1] ),
|
||||
_mm256_set1_epi32( blake2s_IV[5] ) );
|
||||
v[14] = _mm256_xor_si256( _mm256_set1_epi32( S->f[0] ),
|
||||
_mm256_set1_epi32( blake2s_IV[6] ) );
|
||||
v[15] = _mm256_xor_si256( _mm256_set1_epi32( S->f[1] ),
|
||||
_mm256_set1_epi32( blake2s_IV[7] ) );
|
||||
|
||||
#define G8W(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
m[ blake2s_sigma[r][2*i+0] ] ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 16 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 12 ); \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), \
|
||||
m[ blake2s_sigma[r][2*i+1] ] ); \
|
||||
d = mm256_rotr_32( _mm256_xor_si256( d, a ), 8 ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
b = mm256_rotr_32( _mm256_xor_si256( b, c ), 7 ); \
|
||||
} while(0)
|
||||
|
||||
#define ROUND8W(r) \
|
||||
do { \
|
||||
G8W( r, 0, v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G8W( r, 1, v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G8W( r, 2, v[ 2], v[ 6], v[10], v[14] ); \
|
||||
G8W( r, 3, v[ 3], v[ 7], v[11], v[15] ); \
|
||||
G8W( r, 4, v[ 0], v[ 5], v[10], v[15] ); \
|
||||
G8W( r, 5, v[ 1], v[ 6], v[11], v[12] ); \
|
||||
G8W( r, 6, v[ 2], v[ 7], v[ 8], v[13] ); \
|
||||
G8W( r, 7, v[ 3], v[ 4], v[ 9], v[14] ); \
|
||||
} while(0)
|
||||
|
||||
ROUND8W( 0 );
|
||||
ROUND8W( 1 );
|
||||
ROUND8W( 2 );
|
||||
ROUND8W( 3 );
|
||||
ROUND8W( 4 );
|
||||
ROUND8W( 5 );
|
||||
ROUND8W( 6 );
|
||||
ROUND8W( 7 );
|
||||
ROUND8W( 8 );
|
||||
ROUND8W( 9 );
|
||||
|
||||
for( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm256_xor_si256( _mm256_xor_si256( S->h[i], v[i] ), v[i + 8] );
|
||||
|
||||
#undef G8W
|
||||
#undef ROUND8W
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen )
|
||||
{
|
||||
blake2s_nway_param P[1];
|
||||
|
||||
P->digest_length = outlen;
|
||||
P->key_length = 0;
|
||||
P->fanout = 1;
|
||||
P->depth = 1;
|
||||
P->leaf_length = 0;
|
||||
*((uint64_t*)(P->node_offset)) = 0;
|
||||
P->node_depth = 0;
|
||||
P->inner_length = 0;
|
||||
memset( P->salt, 0, sizeof( P->salt ) );
|
||||
memset( P->personal, 0, sizeof( P->personal ) );
|
||||
|
||||
memset( S, 0, sizeof( blake2s_8way_state ) );
|
||||
for( int i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm256_set1_epi32( blake2s_IV[i] );
|
||||
|
||||
uint32_t *p = ( uint32_t * )( P );
|
||||
|
||||
/* IV XOR ParamBlock */
|
||||
for ( size_t i = 0; i < 8; ++i )
|
||||
S->h[i] = _mm256_xor_si256( S->h[i], _mm256_set1_epi32( p[i] ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
|
||||
uint64_t inlen )
|
||||
{
|
||||
__m256i *input = (__m256i*)in;
|
||||
__m256i *buf = (__m256i*)S->buf;
|
||||
const int bsize = BLAKE2S_BLOCKBYTES;
|
||||
|
||||
while( inlen > 0 )
|
||||
{
|
||||
size_t left = S->buflen;
|
||||
if( inlen >= bsize - left )
|
||||
{
|
||||
memcpy_256( buf + (left>>2), input, (bsize - left) >> 2 );
|
||||
S->buflen += bsize - left;
|
||||
S->t[0] += BLAKE2S_BLOCKBYTES;
|
||||
S->t[1] += ( S->t[0] < BLAKE2S_BLOCKBYTES );
|
||||
blake2s_8way_compress( S, buf );
|
||||
S->buflen = 0;
|
||||
input += ( bsize >> 2 );
|
||||
inlen -= bsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_256( buf + ( left>>2 ), input, inlen>>2 );
|
||||
S->buflen += (size_t) inlen;
|
||||
input += ( inlen>>2 );
|
||||
inlen -= inlen;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen )
|
||||
{
|
||||
__m256i *buf = (__m256i*)S->buf;
|
||||
|
||||
S->t[0] += S->buflen;
|
||||
S->t[1] += ( S->t[0] < S->buflen );
|
||||
if ( S->last_node )
|
||||
S->f[1] = ~0U;
|
||||
S->f[0] = ~0U;
|
||||
|
||||
memset_zero_256( buf + ( S->buflen>>2 ),
|
||||
( BLAKE2S_BLOCKBYTES - S->buflen ) >> 2 );
|
||||
blake2s_8way_compress( S, buf );
|
||||
|
||||
for ( int i = 0; i < 8; ++i )
|
||||
casti_m256i( out, i ) = S->h[ i ];
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
#if 0
|
||||
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
|
||||
{
|
||||
blake2s_state S[1];
|
||||
|
||||
/* Verify parameters */
|
||||
if ( NULL == in ) return -1;
|
||||
|
||||
if ( NULL == out ) return -1;
|
||||
|
||||
if ( NULL == key ) keylen = 0; /* Fail here instead if keylen != 0 and key == NULL? */
|
||||
|
||||
if( keylen > 0 )
|
||||
{
|
||||
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( blake2s_init( S, outlen ) < 0 ) return -1;
|
||||
}
|
||||
|
||||
blake2s_update( S, ( uint8_t * )in, inlen );
|
||||
blake2s_final( S, out, outlen );
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __AVX__
|
112
algo/blake/blake2s-hash-4way.h
Normal file
112
algo/blake/blake2s-hash-4way.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/**
|
||||
* BLAKE2 reference source code package - reference C implementations
|
||||
*
|
||||
* Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
* and related and neighboring rights to this software to the public domain
|
||||
* worldwide. This software is distributed without any warranty.
|
||||
*
|
||||
* You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
* this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
*/
|
||||
//#pragma once
|
||||
#ifndef __BLAKE2S_HASH_4WAY_H__
|
||||
#define __BLAKE2S_HASH_4WAY_H__ 1
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <inttypes.h>
|
||||
#define inline __inline
|
||||
#define ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2s_constant
|
||||
{
|
||||
BLAKE2S_BLOCKBYTES = 64,
|
||||
BLAKE2S_OUTBYTES = 32,
|
||||
BLAKE2S_KEYBYTES = 32,
|
||||
BLAKE2S_SALTBYTES = 8,
|
||||
BLAKE2S_PERSONALBYTES = 8
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2s_nway_param
|
||||
{
|
||||
uint8_t digest_length; // 1
|
||||
uint8_t key_length; // 2
|
||||
uint8_t fanout; // 3
|
||||
uint8_t depth; // 4
|
||||
uint32_t leaf_length; // 8
|
||||
uint8_t node_offset[6];// 14
|
||||
uint8_t node_depth; // 15
|
||||
uint8_t inner_length; // 16
|
||||
// uint8_t reserved[0];
|
||||
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
|
||||
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
|
||||
} blake2s_nway_param;
|
||||
#pragma pack(pop)
|
||||
|
||||
ALIGN( 64 ) typedef struct __blake2s_4way_state
|
||||
{
|
||||
__m128i h[8];
|
||||
uint8_t buf[ BLAKE2S_BLOCKBYTES * 4 ];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
size_t buflen;
|
||||
uint8_t last_node;
|
||||
} blake2s_4way_state ;
|
||||
|
||||
int blake2s_4way_init( blake2s_4way_state *S, const uint8_t outlen );
|
||||
int blake2s_4way_update( blake2s_4way_state *S, const void *in,
|
||||
uint64_t inlen );
|
||||
int blake2s_4way_final( blake2s_4way_state *S, void *out, uint8_t outlen );
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
ALIGN( 64 ) typedef struct __blake2s_8way_state
|
||||
{
|
||||
__m256i h[8];
|
||||
uint8_t buf[ BLAKE2S_BLOCKBYTES * 8 ];
|
||||
uint32_t t[2];
|
||||
uint32_t f[2];
|
||||
size_t buflen;
|
||||
uint8_t last_node;
|
||||
} blake2s_8way_state ;
|
||||
|
||||
int blake2s_8way_init( blake2s_8way_state *S, const uint8_t outlen );
|
||||
int blake2s_8way_update( blake2s_8way_state *S, const void *in,
|
||||
uint64_t inlen );
|
||||
int blake2s_8way_final( blake2s_8way_state *S, void *out, uint8_t outlen );
|
||||
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// Simple API
|
||||
// int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
|
||||
|
||||
// Direct Hash Mining Helpers
|
||||
#define blake2s_salt32(out, in, inlen, key32) blake2s(out, in, key32, 32, inlen, 32) /* neoscrypt */
|
||||
#define blake2s_simple(out, in, inlen) blake2s(out, in, NULL, 32, inlen, 0)
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __AVX__
|
||||
|
||||
#endif
|
@@ -1,27 +1,29 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "blake2s-gate.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "crypto/blake2s.h"
|
||||
#include "sph-blake2s.h"
|
||||
|
||||
static __thread blake2s_state s_midstate;
|
||||
static __thread blake2s_state s_ctx;
|
||||
static __thread blake2s_state blake2s_ctx;
|
||||
//static __thread blake2s_state s_ctx;
|
||||
#define MIDLEN 76
|
||||
|
||||
void blake2s_hash(void *output, const void *input)
|
||||
void blake2s_hash( void *output, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(64) hash[BLAKE2S_OUTBYTES];
|
||||
blake2s_state blake2_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
blake2s_init(&blake2_ctx, BLAKE2S_OUTBYTES);
|
||||
blake2s_update(&blake2_ctx, input, 80);
|
||||
blake2s_final(&blake2_ctx, hash, BLAKE2S_OUTBYTES);
|
||||
unsigned char _ALIGN(64) hash[BLAKE2S_OUTBYTES];
|
||||
blake2s_state ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &blake2s_ctx, sizeof ctx );
|
||||
blake2s_update( &ctx, input+64, 16 );
|
||||
|
||||
// blake2s_init(&ctx, BLAKE2S_OUTBYTES);
|
||||
// blake2s_update(&ctx, input, 80);
|
||||
blake2s_final( &ctx, hash, BLAKE2S_OUTBYTES );
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
/*
|
||||
static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
|
||||
{
|
||||
s_ctx.buflen = MIDLEN;
|
||||
@@ -29,7 +31,7 @@ static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
|
||||
blake2s_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
|
||||
blake2s_final(&s_ctx, (uint8_t*) output, BLAKE2S_OUTBYTES);
|
||||
}
|
||||
|
||||
*/
|
||||
int scanhash_blake2s(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
@@ -47,13 +49,12 @@ int scanhash_blake2s(int thr_id, struct work *work,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
// midstate
|
||||
blake2s_init(&s_midstate, BLAKE2S_OUTBYTES);
|
||||
blake2s_update(&s_midstate, (uint8_t*) endiandata, MIDLEN);
|
||||
memcpy(&s_ctx, &s_midstate, sizeof(blake2s_state));
|
||||
blake2s_init( &blake2s_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_update( &blake2s_ctx, (uint8_t*) endiandata, 64 );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
blake2s_hash_end(hash64, endiandata);
|
||||
blake2s_hash( hash64, endiandata );
|
||||
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
@@ -68,7 +69,7 @@ int scanhash_blake2s(int thr_id, struct work *work,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blake2s_get_max64 ()
|
||||
{
|
||||
@@ -82,4 +83,4 @@ bool register_blake2s_algo( algo_gate_t* gate )
|
||||
gate->get_max64 = (void*)&blake2s_get_max64;
|
||||
return true;
|
||||
};
|
||||
|
||||
*/
|
||||
|
139
algo/blake/blakecoin-4way.c
Normal file
139
algo/blake/blakecoin-4way.c
Normal file
@@ -0,0 +1,139 @@
|
||||
#include "blakecoin-gate.h"
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
|
||||
blake256r8_4way_context blakecoin_4w_ctx;
|
||||
|
||||
void blakecoin_4way_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256r8_4way_context ctx;
|
||||
|
||||
memcpy( &ctx, &blakecoin_4w_ctx, sizeof ctx );
|
||||
blake256r8_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4way_close( &ctx, vhash );
|
||||
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
if ( opt_benchmark )
|
||||
HTarget = 0x7f;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
blake256r8_4way_init( &blakecoin_4w_ctx );
|
||||
blake256r8_4way( &blakecoin_4w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
pdata[19] = n;
|
||||
blakecoin_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(BLAKECOIN_8WAY)
|
||||
|
||||
blake256r8_8way_context blakecoin_8w_ctx;
|
||||
|
||||
void blakecoin_8way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
|
||||
blake256r8_8way_context ctx;
|
||||
|
||||
memcpy( &ctx, &blakecoin_8w_ctx, sizeof ctx );
|
||||
blake256r8_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r8_8way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
int num_found = 0;
|
||||
if ( opt_benchmark )
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
blake256r8_8way_init( &blakecoin_8w_ctx );
|
||||
blake256r8_8way( &blakecoin_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
pdata[19] = n;
|
||||
blakecoin_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
36
algo/blake/blakecoin-gate.c
Normal file
36
algo/blake/blakecoin-gate.c
Normal file
@@ -0,0 +1,36 @@
|
||||
#include "blakecoin-gate.h"
|
||||
#include <memory.h>
|
||||
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blakecoin_get_max64 ()
|
||||
{
|
||||
return 0x7ffffLL;
|
||||
// return 0x3fffffLL;
|
||||
}
|
||||
|
||||
// vanilla uses default gen merkle root, otherwise identical to blakecoin
|
||||
bool register_vanilla_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(BLAKECOIN_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_blakecoin_8way;
|
||||
gate->hash = (void*)&blakecoin_8way_hash;
|
||||
|
||||
#elif defined(BLAKECOIN_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_blakecoin_4way;
|
||||
gate->hash = (void*)&blakecoin_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_blakecoin;
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool register_blakecoin_algo( algo_gate_t* gate )
|
||||
{
|
||||
register_vanilla_algo( gate );
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
return true;
|
||||
}
|
||||
|
30
algo/blake/blakecoin-gate.h
Normal file
30
algo/blake/blakecoin-gate.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef __BLAKECOIN_GATE_H__
|
||||
#define __BLAKECOIN_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX__)
|
||||
#define BLAKECOIN_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define BLAKECOIN_8WAY
|
||||
#endif
|
||||
|
||||
#if defined (BLAKECOIN_8WAY)
|
||||
void blakecoin_8way_hash(void *state, const void *input);
|
||||
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
void blakecoin_4way_hash(void *state, const void *input);
|
||||
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void blakecoinhash( void *state, const void *input );
|
||||
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
@@ -1,5 +1,4 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "blakecoin-gate.h"
|
||||
#define BLAKE32_ROUNDS 8
|
||||
#include "sph_blake.h"
|
||||
|
||||
@@ -99,7 +98,7 @@ void blakecoin_gen_merkle_root ( char* merkle_root, struct stratum_ctx* sctx )
|
||||
SHA256( sctx->job.coinbase, (int)sctx->job.coinbase_size, merkle_root );
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
|
||||
int64_t blakecoin_get_max64 ()
|
||||
{
|
||||
@@ -122,4 +121,4 @@ bool register_blakecoin_algo( algo_gate_t* gate )
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
return true;
|
||||
}
|
||||
|
||||
*/
|
||||
|
75
algo/blake/decred-4way.c
Normal file
75
algo/blake/decred-4way.c
Normal file
@@ -0,0 +1,75 @@
|
||||
#include "decred-gate.h"
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined (DECRED_4WAY)
|
||||
|
||||
static __thread blake256_4way_context blake_mid;
|
||||
|
||||
void decred_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash0[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
const void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
|
||||
int tail_len = 180 - DECRED_MIDSTATE_LEN;
|
||||
blake256_4way_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
|
||||
blake256_4way( &ctx, tail, tail_len );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t vdata[48*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t _ALIGN(64) edata[48];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
|
||||
// copy to buffer guaranteed to be aligned.
|
||||
memcpy( edata, pdata, 180 );
|
||||
|
||||
// use the old way until new way updated for size.
|
||||
mm_interleave_4x32x( vdata, edata, edata, edata, edata, 180*8 );
|
||||
|
||||
blake256_4way_init( &blake_mid );
|
||||
blake256_4way( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
|
||||
|
||||
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
|
||||
do {
|
||||
* noncep = n;
|
||||
*(noncep+1) = n+1;
|
||||
*(noncep+2) = n+2;
|
||||
*(noncep+3) = n+3;
|
||||
|
||||
decred_hash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
172
algo/blake/decred-gate.c
Normal file
172
algo/blake/decred-gate.c
Normal file
@@ -0,0 +1,172 @@
|
||||
#include "decred-gate.h"
|
||||
#include <unistd.h>
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
|
||||
uint32_t *decred_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return &work_data[ DECRED_NONCE_INDEX ];
|
||||
}
|
||||
|
||||
double decred_calc_network_diff( struct work* work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
uint32_t nbits = work->data[ DECRED_NBITS_INDEX ];
|
||||
uint32_t bits = ( nbits & 0xffffff );
|
||||
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
||||
int m;
|
||||
double d = (double)0x0000ffff / (double)bits;
|
||||
|
||||
for ( m = shift; m < 29; m++ )
|
||||
d *= 256.0;
|
||||
for ( m = 29; m < shift; m++ )
|
||||
d /= 256.0;
|
||||
if ( shift == 28 )
|
||||
d *= 256.0; // testnet
|
||||
if ( opt_debug_diff )
|
||||
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d,
|
||||
shift, bits );
|
||||
return net_diff;
|
||||
}
|
||||
|
||||
void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
|
||||
{
|
||||
// some random extradata to make the work unique
|
||||
work->data[ DECRED_XNONCE_INDEX ] = (rand()*4);
|
||||
work->height = work->data[32];
|
||||
if (!have_longpoll && work->height > *net_blocks + 1)
|
||||
{
|
||||
char netinfo[64] = { 0 };
|
||||
if (opt_showdiff && net_diff > 0.)
|
||||
{
|
||||
if (net_diff != work->targetdiff)
|
||||
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
|
||||
work->targetdiff);
|
||||
else
|
||||
sprintf(netinfo, ", diff %.3f", net_diff);
|
||||
}
|
||||
applog(LOG_BLUE, "%s block %d%s", algo_names[opt_algo], work->height,
|
||||
netinfo);
|
||||
*net_blocks = work->height - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void decred_be_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
|
||||
be32enc( &ntime, work->data[ DECRED_NTIME_INDEX ] );
|
||||
be32enc( &nonce, work->data[ DECRED_NONCE_INDEX ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
xnonce2str = abin2hex( (char*)( &work->data[ DECRED_XNONCE_INDEX ] ),
|
||||
sctx->xnonce1_size );
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
#define min(a,b) (a>b ? (b) :(a))
|
||||
|
||||
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
uchar merkle_root[64] = { 0 };
|
||||
uint32_t extraheader[32] = { 0 };
|
||||
int headersize = 0;
|
||||
uint32_t* extradata = (uint32_t*) sctx->xnonce1;
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
// getwork over stratum, getwork merkle + header passed in coinb1
|
||||
memcpy(merkle_root, sctx->job.coinbase, 32);
|
||||
headersize = min((int)sctx->job.coinbase_size - 32,
|
||||
sizeof(extraheader) );
|
||||
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
|
||||
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = swab32(
|
||||
le32dec( (uint32_t *) sctx->job.prevhash + i ) );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = swab32( be32dec( (uint32_t *) merkle_root + i ) );
|
||||
|
||||
// for ( i = 0; i < 8; i++ ) // prevhash
|
||||
// g_work->data[1 + i] = swab32( g_work->data[1 + i] );
|
||||
// for ( i = 0; i < 8; i++ ) // merkle
|
||||
// g_work->data[9 + i] = swab32( g_work->data[9 + i] );
|
||||
|
||||
for ( i = 0; i < headersize/4; i++ ) // header
|
||||
g_work->data[17 + i] = extraheader[i];
|
||||
// extradata
|
||||
|
||||
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
|
||||
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
|
||||
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
|
||||
g_work->data[i] = 0;
|
||||
g_work->data[37] = (rand()*4) << 8;
|
||||
// block header suffix from coinb2 (stake version)
|
||||
memcpy( &g_work->data[44],
|
||||
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
|
||||
sctx->bloc_height = g_work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
//applog_hex(&work->data[36], 36);
|
||||
}
|
||||
|
||||
#undef min
|
||||
|
||||
bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id )
|
||||
{
|
||||
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
|
||||
// need to regen g_work..
|
||||
return false;
|
||||
if ( have_stratum && !work->data[0] && !opt_benchmark )
|
||||
{
|
||||
sleep(1);
|
||||
return false;
|
||||
}
|
||||
// extradata: prevent duplicates
|
||||
work->data[ DECRED_XNONCE_INDEX ] += 1;
|
||||
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool register_decred_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(DECRED_4WAY)
|
||||
four_way_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_decred_4way;
|
||||
gate->hash = (void*)&decred_hash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_decred;
|
||||
gate->hash = (void*)&decred_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->display_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->build_extraheader = (void*)&decred_build_extraheader;
|
||||
gate->ready_to_mine = (void*)&decred_ready_to_mine;
|
||||
gate->nbits_index = DECRED_NBITS_INDEX;
|
||||
gate->ntime_index = DECRED_NTIME_INDEX;
|
||||
gate->nonce_index = DECRED_NONCE_INDEX;
|
||||
gate->work_data_size = DECRED_DATA_SIZE;
|
||||
gate->work_cmp_size = DECRED_WORK_COMPARE_SIZE;
|
||||
allow_mininginfo = false;
|
||||
have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
36
algo/blake/decred-gate.h
Normal file
36
algo/blake/decred-gate.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef __DECRED_GATE_H__
|
||||
#define __DECRED_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#define DECRED_NBITS_INDEX 29
|
||||
#define DECRED_NTIME_INDEX 34
|
||||
#define DECRED_NONCE_INDEX 35
|
||||
#define DECRED_XNONCE_INDEX 36
|
||||
#define DECRED_DATA_SIZE 192
|
||||
#define DECRED_WORK_COMPARE_SIZE 140
|
||||
#define DECRED_MIDSTATE_LEN 128
|
||||
|
||||
#if defined (__AVX2__)
|
||||
//void blakehash_84way(void *state, const void *input);
|
||||
//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define DECRED_4WAY
|
||||
#endif
|
||||
|
||||
#if defined (DECRED_4WAY)
|
||||
void decred_hash_4way(void *state, const void *input);
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void decred_hash( void *state, const void *input );
|
||||
int scanhash_decred( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "decred-gate.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
#include <string.h>
|
||||
@@ -15,33 +14,33 @@
|
||||
#define max(a,b) (a<b ? b : a)
|
||||
#endif
|
||||
*/
|
||||
|
||||
/*
|
||||
#define DECRED_NBITS_INDEX 29
|
||||
#define DECRED_NTIME_INDEX 34
|
||||
#define DECRED_NONCE_INDEX 35
|
||||
#define DECRED_XNONCE_INDEX 36
|
||||
#define DECRED_DATA_SIZE 192
|
||||
#define DECRED_WORK_COMPARE_SIZE 140
|
||||
|
||||
*/
|
||||
static __thread sph_blake256_context blake_mid;
|
||||
static __thread bool ctx_midstate_done = false;
|
||||
|
||||
void decred_hash(void *state, const void *input)
|
||||
{
|
||||
#define MIDSTATE_LEN 128
|
||||
// #define MIDSTATE_LEN 128
|
||||
sph_blake256_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
uint8_t *ending = (uint8_t*) input;
|
||||
ending += MIDSTATE_LEN;
|
||||
ending += DECRED_MIDSTATE_LEN;
|
||||
|
||||
if (!ctx_midstate_done) {
|
||||
sph_blake256_init(&blake_mid);
|
||||
sph_blake256(&blake_mid, input, MIDSTATE_LEN);
|
||||
sph_blake256(&blake_mid, input, DECRED_MIDSTATE_LEN);
|
||||
ctx_midstate_done = true;
|
||||
}
|
||||
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
|
||||
|
||||
sph_blake256(&ctx, ending, (180 - MIDSTATE_LEN));
|
||||
sph_blake256(&ctx, ending, (180 - DECRED_MIDSTATE_LEN));
|
||||
sph_blake256_close(&ctx, state);
|
||||
}
|
||||
|
||||
@@ -60,9 +59,9 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
#define DCR_NONCE_OFT32 35
|
||||
// #define DCR_NONCE_OFT32 35
|
||||
|
||||
const uint32_t first_nonce = pdata[DCR_NONCE_OFT32];
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
@@ -82,7 +81,7 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
|
||||
do {
|
||||
//be32enc(&endiandata[DCR_NONCE_OFT32], n);
|
||||
endiandata[DCR_NONCE_OFT32] = n;
|
||||
endiandata[DECRED_NONCE_INDEX] = n;
|
||||
decred_hash(hash32, endiandata);
|
||||
|
||||
if (hash32[7] <= HTarget && fulltest(hash32, ptarget)) {
|
||||
@@ -93,7 +92,7 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
applog_hash(ptarget);
|
||||
applog_compare_hash(hash32, ptarget);
|
||||
#endif
|
||||
pdata[DCR_NONCE_OFT32] = n;
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -102,24 +101,17 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[DCR_NONCE_OFT32] = n;
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
uint32_t *decred_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return &work_data[ DECRED_NONCE_INDEX ];
|
||||
}
|
||||
|
||||
// does decred need a custom stratum_get_g_work to fix nicehash
|
||||
// bad extranonce2 size?
|
||||
//
|
||||
// does decred need a custom init_nonce?
|
||||
// does it need to increment nonce, seems not because gen_work_now always
|
||||
// returns true
|
||||
|
||||
double decred_calc_network_diff( struct work* work )
|
||||
//void decred_calc_network_diff( struct work* work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
@@ -181,7 +173,7 @@ void decred_be_build_stratum_request( char *req, struct work *work,
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
*/
|
||||
/*
|
||||
// data shared between gen_merkle_root and build_extraheader.
|
||||
__thread uint32_t decred_extraheader[32] = { 0 };
|
||||
@@ -197,7 +189,7 @@ void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
#define min(a,b) (a>b ? (b) :(a))
|
||||
|
||||
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
@@ -235,11 +227,15 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
for ( i = 0; i < headersize/4; i++ ) // header
|
||||
g_work->data[17 + i] = extraheader[i];
|
||||
// extradata
|
||||
|
||||
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
|
||||
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
|
||||
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
|
||||
g_work->data[i] = 0;
|
||||
g_work->data[37] = (rand()*4) << 8;
|
||||
// block header suffix from coinb2 (stake version)
|
||||
memcpy( &g_work->data[44],
|
||||
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
|
||||
sctx->bloc_height = g_work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
//applog_hex(&work->data[36], 36);
|
||||
@@ -274,6 +270,8 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
|
||||
gate->display_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->build_extraheader = (void*)&decred_build_extraheader;
|
||||
gate->ready_to_mine = (void*)&decred_ready_to_mine;
|
||||
gate->nbits_index = DECRED_NBITS_INDEX;
|
||||
@@ -285,4 +283,4 @@ bool register_decred_algo( algo_gate_t* gate )
|
||||
have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
*/
|
||||
|
175
algo/blake/pentablake-4way.c
Normal file
175
algo/blake/pentablake-4way.c
Normal file
@@ -0,0 +1,175 @@
|
||||
#include "pentablake-gate.h"
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake-hash-4way.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
//#define DEBUG_ALGO
|
||||
|
||||
extern void pentablakehash_4way( void *output, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(32) hash[128];
|
||||
// // same as uint32_t hashA[16], hashB[16];
|
||||
// #define hashB hash+64
|
||||
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake512_4way_context ctx;
|
||||
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, input, 80 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
uint64_t sin0[10], sin1[10], sin2[10], sin3[10];
|
||||
mm256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 );
|
||||
sph_blake512_context ctx2_blake;
|
||||
sph_blake512_init(&ctx2_blake);
|
||||
sph_blake512(&ctx2_blake, sin0, 80);
|
||||
sph_blake512_close(&ctx2_blake, (void*) hash);
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
uint64_t* hash64 = (uint64_t*)hash;
|
||||
for( int i = 0; i < 8; i++ )
|
||||
{
|
||||
if ( hash0[i] != hash64[i] )
|
||||
printf("hash mismatch %u\n",i);
|
||||
}
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
|
||||
/*
|
||||
uint64_t sin0[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin1[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin2[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin3[10] __attribute__ ((aligned (64)));
|
||||
|
||||
sph_blake512_context ctx_blake;
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, input, 80);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[32] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
// uint32_t _ALIGN(32) hash64[8];
|
||||
// uint32_t _ALIGN(32) endiandata[32];
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
pentablakehash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( !( (hash+(i<<3))[7] & mask )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
16
algo/blake/pentablake-gate.c
Normal file
16
algo/blake/pentablake-gate.c
Normal file
@@ -0,0 +1,16 @@
|
||||
#include "pentablake-gate.h"
|
||||
|
||||
bool register_pentablake_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (PENTABLAKE_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_pentablake_4way;
|
||||
gate->hash = (void*)&pentablakehash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_pentablake;
|
||||
gate->hash = (void*)&pentablakehash;
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
21
algo/blake/pentablake-gate.h
Normal file
21
algo/blake/pentablake-gate.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef __PENTABLAKE_GATE_H__
|
||||
#define __PENTABLAKE_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define PENTABLAKE_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(PENTABLAKE_4WAY)
|
||||
void pentablakehash_4way( void *state, const void *input );
|
||||
int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void pentablakehash( void *state, const void *input );
|
||||
int scanhash_pentablake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "pentablake-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -111,11 +110,3 @@ int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_pentablake_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_pentablake;
|
||||
gate->hash = (void*)&pentablakehash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -16,7 +16,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "crypto/blake2s.h"
|
||||
#include "sph-blake2s.h"
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
@@ -813,6 +813,7 @@ blake32(sph_blake_small_context *sc, const void *data, size_t len)
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
@@ -871,6 +872,7 @@ blake32_close(sph_blake_small_context *sc,
|
||||
} else {
|
||||
sc->T0 -= 512 - bit_len;
|
||||
}
|
||||
|
||||
if (bit_len <= 446) {
|
||||
memset(u.buf + ptr + 1, 0, 55 - ptr);
|
||||
if (out_size_w32 == 8)
|
||||
@@ -890,9 +892,9 @@ blake32_close(sph_blake_small_context *sc,
|
||||
sph_enc32be_aligned(u.buf + 60, tl);
|
||||
blake32(sc, u.buf, 64);
|
||||
}
|
||||
out = dst;
|
||||
for (k = 0; k < out_size_w32; k ++)
|
||||
sph_enc32be(out + (k << 2), sc->H[k]);
|
||||
out = dst;
|
||||
for (k = 0; k < out_size_w32; k ++)
|
||||
sph_enc32be(out + (k << 2), sc->H[k]);
|
||||
}
|
||||
|
||||
#if SPH_64
|
||||
@@ -982,9 +984,11 @@ blake64_close(sph_blake_big_context *sc,
|
||||
u.buf[111] |= 1;
|
||||
sph_enc64be_aligned(u.buf + 112, th);
|
||||
sph_enc64be_aligned(u.buf + 120, tl);
|
||||
|
||||
blake64(sc, u.buf + ptr, 128 - ptr);
|
||||
} else {
|
||||
memset(u.buf + ptr + 1, 0, 127 - ptr);
|
||||
|
||||
blake64(sc, u.buf + ptr, 128 - ptr);
|
||||
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
|
||||
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
||||
@@ -993,6 +997,7 @@ blake64_close(sph_blake_big_context *sc,
|
||||
u.buf[111] = 1;
|
||||
sph_enc64be_aligned(u.buf + 112, th);
|
||||
sph_enc64be_aligned(u.buf + 120, tl);
|
||||
|
||||
blake64(sc, u.buf, 128);
|
||||
}
|
||||
out = dst;
|
||||
|
1168
algo/bmw/bmw-hash-4way.c
Normal file
1168
algo/bmw/bmw-hash-4way.c
Normal file
File diff suppressed because it is too large
Load Diff
95
algo/bmw/bmw-hash-4way.h
Normal file
95
algo/bmw/bmw-hash-4way.h
Normal file
@@ -0,0 +1,95 @@
|
||||
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of
|
||||
* functions which differ by their output size; this implementation
|
||||
* defines BMW for output sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_bmw.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef BMW_HASH_H__
|
||||
#define BMW_HASH_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#define SPH_SIZE_bmw256 256
|
||||
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[64];
|
||||
__m128i H[16];
|
||||
size_t ptr;
|
||||
sph_u32 bit_count; // assume bit_count fits in 32 bits
|
||||
} bmw_4way_small_context;
|
||||
|
||||
typedef bmw_4way_small_context bmw256_4way_context;
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[16];
|
||||
__m256i H[16];
|
||||
size_t ptr;
|
||||
sph_u64 bit_count;
|
||||
} bmw_4way_big_context;
|
||||
|
||||
typedef bmw_4way_big_context bmw512_4way_context;
|
||||
|
||||
void bmw256_4way_init(void *cc);
|
||||
|
||||
void bmw256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void bmw256_4way_close(void *cc, void *dst);
|
||||
|
||||
void bmw256_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void bmw512_4way_init(void *cc);
|
||||
|
||||
void bmw512_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void bmw512_4way_close(void *cc, void *dst);
|
||||
|
||||
void bmw512_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1251
algo/bmw/bmw.test
Normal file
1251
algo/bmw/bmw.test
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,3 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <string.h>
|
||||
|
@@ -477,7 +477,7 @@ do { \
|
||||
for (u = 0; u < 16; u ++) \
|
||||
sph_enc64le_aligned(data + 8 * u, h2[u]); \
|
||||
dh = h1; \
|
||||
h = final_b; \
|
||||
h = (sph_u64*)final_b; \
|
||||
} \
|
||||
/* end wrapped for break loop */ \
|
||||
out = dst; \
|
||||
|
@@ -2,7 +2,6 @@
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#if defined(__arm__) || defined(_MSC_VER)
|
||||
|
@@ -3,7 +3,8 @@
|
||||
#include "cryptonight.h"
|
||||
#include "miner.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "avxdefs.h"
|
||||
#include <immintrin.h>
|
||||
//#include "avxdefs.h"
|
||||
|
||||
void aesni_parallel_noxor(uint8_t *long_state, uint8_t *text, uint8_t *ExpandedKey);
|
||||
void aesni_parallel_xor(uint8_t *text, uint8_t *ExpandedKey, uint8_t *long_state);
|
||||
@@ -109,43 +110,43 @@ static __thread cryptonight_ctx ctx;
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
|
||||
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
size_t i, j;
|
||||
|
||||
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
|
||||
memcpy(ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE);
|
||||
ExpandAESKey256(ExpandedKey);
|
||||
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
|
||||
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
|
||||
__m128i *longoutput, *expkey, *xmminput;
|
||||
longoutput = (__m128i *)ctx.long_state;
|
||||
expkey = (__m128i *)ExpandedKey;
|
||||
xmminput = (__m128i *)ctx.text;
|
||||
longoutput = (__m128i*)ctx.long_state;
|
||||
xmminput = (__m128i*)ctx.text;
|
||||
expkey = (__m128i*)ExpandedKey;
|
||||
|
||||
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
|
||||
// aesni_parallel_noxor(&ctx->long_state[i], ctx->text, ExpandedKey);
|
||||
|
||||
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
||||
// prefetch expkey, xmminput and enough longoutput for 4 iterations
|
||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
{
|
||||
_mm_prefetch( longoutput + i, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 8, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
|
||||
}
|
||||
_mm_prefetch( expkey, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||
|
||||
for ( i = 0; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
{
|
||||
// prefetch 4 loops ahead,
|
||||
__builtin_prefetch( longoutput + i, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 4, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 8, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 12, 1, 0 );
|
||||
}
|
||||
|
||||
// n-4 iterations
|
||||
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||
i += INIT_SIZE_M128I )
|
||||
{
|
||||
// prefetch 4 iterations ahead.
|
||||
__builtin_prefetch( longoutput + i + 64, 1, 0 );
|
||||
__builtin_prefetch( longoutput + i + 68, 1, 0 );
|
||||
|
||||
for (j = 0; j < 10; j++ )
|
||||
for ( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
@@ -165,84 +166,99 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||
}
|
||||
// last 4 iterations
|
||||
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
{
|
||||
for ( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
|
||||
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
|
||||
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
|
||||
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
|
||||
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
|
||||
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
|
||||
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
|
||||
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
|
||||
}
|
||||
|
||||
// cast_m128i( ctx.a ) = _mm_xor_si128( casti_m128i( ctx.state.k, 0 ) ,
|
||||
// casti_m128i( ctx.state.k, 2 ) );
|
||||
// cast_m128i( ctx.b ) = _mm_xor_si128( casti_m128i( ctx.state.k, 1 ),
|
||||
// casti_m128i( ctx.state.k, 3 ) );
|
||||
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
|
||||
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
|
||||
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
|
||||
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
|
||||
|
||||
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
|
||||
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
|
||||
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
|
||||
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
|
||||
|
||||
// for (i = 0; i < 2; i++)
|
||||
// {
|
||||
// ctx.a[i] = ((uint64_t *)ctx.state.k)[i] ^ ((uint64_t *)ctx.state.k)[i+4];
|
||||
// ctx.b[i] = ((uint64_t *)ctx.state.k)[i+2] ^ ((uint64_t *)ctx.state.k)[i+6];
|
||||
// }
|
||||
|
||||
__m128i b_x = _mm_load_si128((__m128i *)ctx.b);
|
||||
uint64_t a[2] __attribute((aligned(16))), b[2] __attribute((aligned(16)));
|
||||
uint64_t a[2] __attribute((aligned(16))),
|
||||
b[2] __attribute((aligned(16))),
|
||||
c[2] __attribute((aligned(16)));
|
||||
a[0] = ctx.a[0];
|
||||
a[1] = ctx.a[1];
|
||||
|
||||
for(i = 0; __builtin_expect(i < 0x80000, 1); i++)
|
||||
__m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
|
||||
__m128i a_x = _mm_load_si128( (__m128i*)a );
|
||||
__m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
__m128i c_x = _mm_load_si128( lsa );
|
||||
uint64_t *nextblock;
|
||||
uint64_t hi, lo;
|
||||
|
||||
// n-1 iterations
|
||||
for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
|
||||
{
|
||||
uint64_t c[2];
|
||||
__builtin_prefetch( &ctx.long_state[c[0] & 0x1FFFF0], 0, 1 );
|
||||
|
||||
__m128i c_x = _mm_load_si128(
|
||||
(__m128i *)&ctx.long_state[a[0] & 0x1FFFF0]);
|
||||
__m128i a_x = _mm_load_si128((__m128i *)a);
|
||||
c_x = _mm_aesenc_si128(c_x, a_x);
|
||||
_mm_store_si128((__m128i *)c, c_x);
|
||||
|
||||
b_x = _mm_xor_si128(b_x, c_x);
|
||||
_mm_store_si128((__m128i *)&ctx.long_state[a[0] & 0x1FFFF0], b_x);
|
||||
|
||||
uint64_t *nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
// uint64_t b[2];
|
||||
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
{
|
||||
uint64_t hi, lo;
|
||||
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
||||
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
|
||||
__asm__( "mulq %3\n\t"
|
||||
: "=d" ( hi ),
|
||||
"=a" ( lo )
|
||||
: "%a" ( c[0] ),
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
__asm__("mulq %3\n\t"
|
||||
: "=d" (hi),
|
||||
"=a" (lo)
|
||||
: "%a" (c[0]),
|
||||
"rm" (b[0])
|
||||
: "cc" );
|
||||
|
||||
a[0] += hi;
|
||||
a[1] += lo;
|
||||
}
|
||||
uint64_t *dst = (uint64_t*)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
// __m128i *dst = (__m128i*)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
|
||||
// *dst = cast_m128i( a );
|
||||
dst[0] = a[0];
|
||||
dst[1] = a[1];
|
||||
|
||||
// cast_m128i( a ) = _mm_xor_si128( cast_m128i( a ), cast_m128i( b ) );
|
||||
a[0] ^= b[0];
|
||||
a[1] ^= b[1];
|
||||
b_x = c_x;
|
||||
__builtin_prefetch( &ctx.long_state[a[0] & 0x1FFFF0], 0, 3 );
|
||||
b_x = c_x;
|
||||
nextblock[0] = a[0] + hi;
|
||||
nextblock[1] = a[1] + lo;
|
||||
a[0] = b[0] ^ nextblock[0];
|
||||
a[1] = b[1] ^ nextblock[1];
|
||||
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
|
||||
a_x = _mm_load_si128( (__m128i*)a );
|
||||
c_x = _mm_load_si128( lsa );
|
||||
}
|
||||
// abreviated nth iteration
|
||||
c_x = _mm_aesenc_si128( c_x, a_x );
|
||||
_mm_store_si128( (__m128i*)c, c_x );
|
||||
b_x = _mm_xor_si128( b_x, c_x );
|
||||
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
|
||||
_mm_store_si128( lsa, b_x );
|
||||
b[0] = nextblock[0];
|
||||
b[1] = nextblock[1];
|
||||
|
||||
__asm__( "mulq %3\n\t"
|
||||
: "=d" ( hi ),
|
||||
"=a" ( lo )
|
||||
: "%a" ( c[0] ),
|
||||
"rm" ( b[0] )
|
||||
: "cc" );
|
||||
|
||||
nextblock[0] = a[0] + hi;
|
||||
nextblock[1] = a[1] + lo;
|
||||
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
|
||||
ExpandAESKey256( ExpandedKey );
|
||||
|
||||
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
|
||||
// aesni_parallel_xor(&ctx->text, ExpandedKey, &ctx->long_state[i]);
|
||||
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
|
||||
|
||||
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
|
||||
|
||||
_mm_prefetch( xmminput, _MM_HINT_T0 );
|
||||
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
|
||||
for ( i = 0; i < 64; i += 16 )
|
||||
@@ -256,9 +272,11 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
|
||||
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
|
||||
|
||||
for ( i = 0; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
// n-4 iterations
|
||||
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
|
||||
i += INIT_SIZE_M128I )
|
||||
{
|
||||
// stay 4 loops ahead,
|
||||
// stay 4 iterations ahead.
|
||||
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
|
||||
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
|
||||
|
||||
@@ -283,10 +301,34 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
}
|
||||
|
||||
// last 4 iterations
|
||||
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
|
||||
{
|
||||
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
|
||||
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
|
||||
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
|
||||
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
|
||||
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
|
||||
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
|
||||
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
|
||||
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
|
||||
|
||||
for( j = 0; j < 10; j++ )
|
||||
{
|
||||
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
|
||||
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
|
||||
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
|
||||
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
|
||||
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
|
||||
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
|
||||
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
|
||||
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
|
||||
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
|
||||
|
||||
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
@@ -5,7 +5,6 @@
|
||||
// Modified for CPUminer by Lucas Jones
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
//#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
|
@@ -10,6 +10,10 @@
|
||||
#endif
|
||||
#include "cubehash_sse2.h"
|
||||
#include "algo/sha/sha3-defs.h"
|
||||
#include <stdbool.h>
|
||||
#include <unistd.h>
|
||||
#include <memory.h>
|
||||
#include "avxdefs.h"
|
||||
|
||||
static void transform( cubehashParam *sp )
|
||||
{
|
||||
@@ -125,6 +129,18 @@ static void transform( cubehashParam *sp )
|
||||
#endif
|
||||
} // transform
|
||||
|
||||
// Cubehash context initializing is very expensive.
|
||||
// Cache the intial value for faster reinitializing.
|
||||
cubehashParam cube_ctx_cache __attribute__ ((aligned (64)));
|
||||
|
||||
int cubehashReinit( cubehashParam *sp )
|
||||
{
|
||||
memcpy( sp, &cube_ctx_cache, sizeof(cubehashParam) );
|
||||
return SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
// Initialize the cache then copy to sp.
|
||||
int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
|
||||
{
|
||||
int i;
|
||||
@@ -135,24 +151,26 @@ int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
|
||||
|
||||
/* Sanity checks */
|
||||
if ( rounds <= 0 || rounds > 32 )
|
||||
rounds = CUBEHASH_ROUNDS;
|
||||
rounds = CUBEHASH_ROUNDS;
|
||||
if ( blockbytes <= 0 || blockbytes >= 256)
|
||||
blockbytes = CUBEHASH_BLOCKBYTES;
|
||||
blockbytes = CUBEHASH_BLOCKBYTES;
|
||||
|
||||
// all sizes of __m128i
|
||||
sp->hashlen = hashbitlen/128;
|
||||
sp->blocksize = blockbytes/16;
|
||||
sp->rounds = rounds;
|
||||
sp->pos = 0;
|
||||
cube_ctx_cache.hashlen = hashbitlen/128;
|
||||
cube_ctx_cache.blocksize = blockbytes/16;
|
||||
cube_ctx_cache.rounds = rounds;
|
||||
cube_ctx_cache.pos = 0;
|
||||
|
||||
for ( i = 0; i < 8; ++i )
|
||||
sp->x[i] = _mm_set_epi32(0, 0, 0, 0);
|
||||
cube_ctx_cache.x[i] = _mm_setzero_si128();;
|
||||
|
||||
sp->x[0] = _mm_set_epi32( 0, rounds, blockbytes, hashbitlen / 8 );
|
||||
cube_ctx_cache.x[0] = _mm_set_epi32( 0, rounds, blockbytes,
|
||||
hashbitlen / 8 );
|
||||
|
||||
for ( i = 0; i < 10; ++i )
|
||||
transform(sp);
|
||||
// sp->pos = 0;
|
||||
transform( &cube_ctx_cache );
|
||||
|
||||
memcpy( sp, &cube_ctx_cache, sizeof(cubehashParam) );
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
@@ -29,6 +29,8 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
int cubehashInit(cubehashParam* sp, int hashbitlen, int rounds, int blockbytes);
|
||||
// reinitialize context with same parameters, much faster.
|
||||
int cubehashReinit( cubehashParam* sp );
|
||||
|
||||
int cubehashUpdate(cubehashParam* sp, const byte *data, size_t size);
|
||||
|
||||
|
@@ -1,2 +0,0 @@
|
||||
amd64
|
||||
x86
|
@@ -14,18 +14,20 @@
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
||||
*
|
||||
*/
|
||||
#if defined(__AES__)
|
||||
|
||||
#include <memory.h>
|
||||
#include "miner.h"
|
||||
#include "hash_api.h"
|
||||
#include "vperm.h"
|
||||
|
||||
//#include "vperm.h"
|
||||
#include <immintrin.h>
|
||||
/*
|
||||
#ifndef NO_AES_NI
|
||||
#include <wmmintrin.h>
|
||||
#else
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
*/
|
||||
|
||||
MYALIGN const unsigned int _k_s0F[] = {0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F};
|
||||
MYALIGN const unsigned int _k_ipt[] = {0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090, 0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC};
|
||||
@@ -246,7 +248,8 @@ void DumpState(__m128i *ps)
|
||||
void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
|
||||
{
|
||||
unsigned int r, b, i, j;
|
||||
__m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
|
||||
// __m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
|
||||
__m128i t1, t2, s2, k1;
|
||||
__m128i _state[4][4], _state2[4][4], _statebackup[4][4];
|
||||
|
||||
|
||||
@@ -396,7 +399,7 @@ HashReturn init_echo(hashState_echo *ctx, int nHashSize)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
ctx->k = _mm_xor_si128(ctx->k, ctx->k);
|
||||
ctx->k = _mm_setzero_si128();
|
||||
ctx->processed_bits = 0;
|
||||
ctx->uBufferBytes = 0;
|
||||
|
||||
@@ -742,4 +745,4 @@ HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databit
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -1 +0,0 @@
|
||||
Çağdaş Çalık
|
@@ -1,119 +0,0 @@
|
||||
/*
|
||||
* file : vperm.h
|
||||
* version : 1.0.208
|
||||
* date : 14.12.2010
|
||||
*
|
||||
* vperm implementation of AES s-box
|
||||
*
|
||||
* Credits: Adapted from Mike Hamburg's AES implementation, http://crypto.stanford.edu/vpaes/
|
||||
*
|
||||
* Cagdas Calik
|
||||
* ccalik@metu.edu.tr
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef VPERM_H
|
||||
#define VPERM_H
|
||||
|
||||
#include "algo/sha/sha3_common.h"
|
||||
#include <tmmintrin.h>
|
||||
|
||||
/*
|
||||
extern const unsigned int _k_s0F[];
|
||||
extern const unsigned int _k_ipt[];
|
||||
extern const unsigned int _k_opt[];
|
||||
extern const unsigned int _k_inv[];
|
||||
extern const unsigned int _k_sb1[];
|
||||
extern const unsigned int _k_sb2[];
|
||||
extern const unsigned int _k_sb3[];
|
||||
extern const unsigned int _k_sb4[];
|
||||
extern const unsigned int _k_sb5[];
|
||||
extern const unsigned int _k_sb7[];
|
||||
extern const unsigned int _k_sbo[];
|
||||
extern const unsigned int _k_h63[];
|
||||
extern const unsigned int _k_hc6[];
|
||||
extern const unsigned int _k_h5b[];
|
||||
extern const unsigned int _k_h4e[];
|
||||
extern const unsigned int _k_h0e[];
|
||||
extern const unsigned int _k_h15[];
|
||||
extern const unsigned int _k_aesmix1[];
|
||||
extern const unsigned int _k_aesmix2[];
|
||||
extern const unsigned int _k_aesmix3[];
|
||||
extern const unsigned int _k_aesmix4[];
|
||||
*/
|
||||
|
||||
// input: x, table
|
||||
// output: x
|
||||
#define TRANSFORM(x, table, t1, t2)\
|
||||
t1 = _mm_andnot_si128(M128(_k_s0F), x);\
|
||||
t1 = _mm_srli_epi32(t1, 4);\
|
||||
x = _mm_and_si128(x, M128(_k_s0F));\
|
||||
t1 = _mm_shuffle_epi8(*((__m128i*)table + 1), t1);\
|
||||
x = _mm_shuffle_epi8(*((__m128i*)table + 0), x);\
|
||||
x = _mm_xor_si128(x, t1)
|
||||
|
||||
// compiled erroneously with 32-bit msc compiler
|
||||
//t2 = _mm_shuffle_epi8(table[0], x);\
|
||||
//x = _mm_shuffle_epi8(table[1], t1);\
|
||||
//x = _mm_xor_si128(x, t2)
|
||||
|
||||
|
||||
// input: x
|
||||
// output: t2, t3
|
||||
#define SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4)\
|
||||
t1 = _mm_andnot_si128(M128(_k_s0F), x);\
|
||||
t1 = _mm_srli_epi32(t1, 4);\
|
||||
x = _mm_and_si128(x, M128(_k_s0F));\
|
||||
t2 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 1), x);\
|
||||
x = _mm_xor_si128(x, t1);\
|
||||
t3 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t1);\
|
||||
t3 = _mm_xor_si128(t3, t2);\
|
||||
t4 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), x);\
|
||||
t4 = _mm_xor_si128(t4, t2);\
|
||||
t2 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t3);\
|
||||
t2 = _mm_xor_si128(t2, x);\
|
||||
t3 = _mm_shuffle_epi8(*((__m128i*)_k_inv + 0), t4);\
|
||||
t3 = _mm_xor_si128(t3, t1);\
|
||||
|
||||
|
||||
// input: x1, x2, table
|
||||
// output: y
|
||||
#define VPERM_LOOKUP(x1, x2, table, y, t)\
|
||||
t = _mm_shuffle_epi8(*((__m128i*)table + 0), x1);\
|
||||
y = _mm_shuffle_epi8(*((__m128i*)table + 1), x2);\
|
||||
y = _mm_xor_si128(y, t)
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define SUBSTITUTE_VPERM(x, t1, t2, t3, t4) \
|
||||
TRANSFORM(x, _k_ipt, t1, t2);\
|
||||
SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sbo, x, t1);\
|
||||
x = _mm_xor_si128(x, M128(_k_h63))
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define AES_ROUND_VPERM_CORE(x, t1, t2, t3, t4, s1, s2, s3) \
|
||||
SUBSTITUTE_VPERM_CORE(x, t1, t2, t3, t4);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sb1, s1, t1);\
|
||||
VPERM_LOOKUP(t2, t3, _k_sb2, s2, t1);\
|
||||
s3 = _mm_xor_si128(s1, s2);\
|
||||
x = _mm_shuffle_epi8(s2, M128(_k_aesmix1));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s3, M128(_k_aesmix2)));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s1, M128(_k_aesmix3)));\
|
||||
x = _mm_xor_si128(x, _mm_shuffle_epi8(s1, M128(_k_aesmix4)));\
|
||||
x = _mm_xor_si128(x, M128(_k_h5b))
|
||||
|
||||
|
||||
// input: x
|
||||
// output: x
|
||||
#define AES_ROUND_VPERM(x, t1, t2, t3, t4, s1, s2, s3) \
|
||||
TRANSFORM(x, _k_ipt, t1, t2);\
|
||||
AES_ROUND_VPERM_CORE(x, t1, t2, t3, t4, s1, s2, s3);\
|
||||
TRANSFORM(x, _k_opt, t1, t2)
|
||||
|
||||
#endif // VPERM_H
|
||||
|
@@ -21,7 +21,7 @@
|
||||
|
||||
#include "brg_endian.h"
|
||||
#define NEED_UINT_64T
|
||||
#include "brg_types.h"
|
||||
#include "algo/sha/brg_types.h"
|
||||
|
||||
/* some sizes (number of bytes) */
|
||||
#define ROWS (8)
|
||||
|
@@ -35,7 +35,7 @@ typedef crypto_uint64 u64;
|
||||
|
||||
#include "brg_endian.h"
|
||||
#define NEED_UINT_64T
|
||||
#include "brg_types.h"
|
||||
#include "algo/sha/brg_types.h"
|
||||
|
||||
#ifdef IACA_TRACE
|
||||
#include IACA_MARKS
|
||||
|
@@ -1,4 +1,3 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <stdio.h>
|
||||
@@ -99,22 +98,21 @@ void groestl_set_target( struct work* work, double job_diff )
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_groestl_algo( algo_gate_t* gate )
|
||||
bool register_dmd_gr_algo( algo_gate_t* gate )
|
||||
{
|
||||
init_groestl_ctx();
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
gate->scanhash = (void*)&scanhash_groestl;
|
||||
gate->hash = (void*)&groestlhash;
|
||||
gate->set_target = (void*)&groestl_set_target;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool register_dmd_gr_algo( algo_gate_t* gate )
|
||||
bool register_groestl_algo( algo_gate_t* gate )
|
||||
{
|
||||
register_groestl_algo( gate );
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
register_dmd_gr_algo( gate );
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "myrgr-gate.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -11,12 +10,7 @@
|
||||
#else
|
||||
#include "aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
#if defined __SHA__
|
||||
#include <openssl/sha.h>
|
||||
#else
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#endif
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
|
||||
typedef struct {
|
||||
#ifdef NO_AES_NI
|
||||
@@ -24,11 +18,7 @@ typedef struct {
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
#if defined __SHA__
|
||||
SHA256_CTX sha;
|
||||
#else
|
||||
sph_sha256_context sha;
|
||||
#endif
|
||||
sph_sha256_context sha;
|
||||
} myrgr_ctx_holder;
|
||||
|
||||
myrgr_ctx_holder myrgr_ctx;
|
||||
@@ -40,44 +30,37 @@ void init_myrgr_ctx()
|
||||
#else
|
||||
init_groestl (&myrgr_ctx.groestl, 64 );
|
||||
#endif
|
||||
#if defined __SHA__
|
||||
SHA256_Init( &myrgr_ctx.sha );
|
||||
#else
|
||||
sph_sha256_init( &myrgr_ctx.sha );
|
||||
#endif
|
||||
sph_sha256_init(&myrgr_ctx.sha);
|
||||
}
|
||||
|
||||
void myriadhash( void *output, const void *input )
|
||||
void myriad_hash(void *output, const void *input)
|
||||
{
|
||||
myrgr_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &myrgr_ctx, sizeof(myrgr_ctx) );
|
||||
uint32_t hash[16] __attribute__ ((aligned (64)));
|
||||
myrgr_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_ctx, sizeof(myrgr_ctx) );
|
||||
|
||||
uint32_t _ALIGN(32) hash[16];
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512(&ctx.groestl, input, 80);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512(&ctx.groestl, input, 80);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_and_final_groestl( &ctx.groestl, (char*)input,
|
||||
(const char*)input, 640 );
|
||||
update_groestl( &ctx.groestl, (char*)input, 640 );
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
#endif
|
||||
|
||||
#if defined __SHA__
|
||||
SHA256_Update( &ctx.sha, hash, 64 );
|
||||
SHA256_Final( (unsigned char*) hash, &ctx.sha );
|
||||
#else
|
||||
sph_sha256(&ctx.sha, hash, 64);
|
||||
sph_sha256_close(&ctx.sha, hash);
|
||||
#endif
|
||||
memcpy(output, hash, 32);
|
||||
sph_sha256(&ctx.sha, hash, 64);
|
||||
sph_sha256_close(&ctx.sha, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_myriad(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
@@ -88,9 +71,9 @@ int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
do {
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8];
|
||||
be32enc(&endiandata[19], nonce);
|
||||
myriadhash(hash, endiandata);
|
||||
myriad_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
pdata[19] = nonce;
|
||||
@@ -105,14 +88,15 @@ int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
bool register_myriad_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | SHA_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT;
|
||||
init_myrgr_ctx();
|
||||
gate->scanhash = (void*)&scanhash_myriad;
|
||||
gate->hash = (void*)&myriadhash;
|
||||
// gate->hash_alt = (void*)&myriadhash;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
||||
*/
|
||||
|
107
algo/groestl/myrgr-4way.c
Normal file
107
algo/groestl/myrgr-4way.c
Normal file
@@ -0,0 +1,107 @@
|
||||
#include "myrgr-gate.h"
|
||||
|
||||
#if defined(MYRGR_4WAY)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "aes_ni/hash-groestl.h"
|
||||
#include "algo/sha/sha2-hash-4way.h"
|
||||
|
||||
typedef struct {
|
||||
hashState_groestl groestl;
|
||||
sha256_4way_context sha;
|
||||
} myrgr_4way_ctx_holder;
|
||||
|
||||
myrgr_4way_ctx_holder myrgr_4way_ctx;
|
||||
|
||||
void init_myrgr_4way_ctx()
|
||||
{
|
||||
init_groestl (&myrgr_4way_ctx.groestl, 64 );
|
||||
sha256_4way_init( &myrgr_4way_ctx.sha );
|
||||
}
|
||||
|
||||
void myriad_4way_hash( void *output, const void *input )
|
||||
{
|
||||
uint32_t hash0[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[20] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
||||
myrgr_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_4way_ctx, sizeof(myrgr_4way_ctx) );
|
||||
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 640 );
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash1, (char*)hash1, 640 );
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash2, (char*)hash2, 640 );
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 640 );
|
||||
|
||||
mm_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha256_4way( &ctx.sha, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha, vhash );
|
||||
|
||||
mm_deinterleave_4x32( output, output+32, output+64, output+96,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
/*
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
*/
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
myriad_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
18
algo/groestl/myrgr-gate.c
Normal file
18
algo/groestl/myrgr-gate.c
Normal file
@@ -0,0 +1,18 @@
|
||||
#include "myrgr-gate.h"
|
||||
|
||||
bool register_myriad_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (MYRGR_4WAY)
|
||||
init_myrgr_4way_ctx();
|
||||
gate->scanhash = (void*)&scanhash_myriad_4way;
|
||||
gate->hash = (void*)&myriad_4way_hash;
|
||||
#else
|
||||
init_myrgr_ctx();
|
||||
gate->scanhash = (void*)&scanhash_myriad;
|
||||
gate->hash = (void*)&myriad_hash;
|
||||
#endif
|
||||
gate->optimizations = AES_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0x3ffff;
|
||||
return true;
|
||||
};
|
||||
|
30
algo/groestl/myrgr-gate.h
Normal file
30
algo/groestl/myrgr-gate.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef MYRGR_GATE_H__
|
||||
#define MYRGR_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define MYRGR_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(MYRGR_4WAY)
|
||||
|
||||
void myriad_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_myrgr_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void myriad_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
void init_myrgr_ctx();
|
||||
|
||||
#endif
|
||||
|
935
algo/hamsi/hamsi-hash-4way.c
Normal file
935
algo/hamsi/hamsi-hash-4way.c
Normal file
@@ -0,0 +1,935 @@
|
||||
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
|
||||
/*
|
||||
* Hamsi implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
//#include "miner.h"
|
||||
#include "hamsi-hash-4way.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
|
||||
* table lookup during message expansion (1 to 8, inclusive). If we note
|
||||
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
|
||||
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
|
||||
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
|
||||
* then we will get t tables (where t=ceil(w/n)) of individual size
|
||||
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
|
||||
* n=5, there are 7 tables, but the last one uses only two bits on
|
||||
* input, not five).
|
||||
*
|
||||
* Also, we read t rows of r words from RAM. Words in a given row are
|
||||
* concatenated in RAM in that order, so most of the cost is about
|
||||
* reading the first row word; comparatively, cache misses are thus
|
||||
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
|
||||
*
|
||||
* When n=1, tables are "special" in that we omit the first entry of
|
||||
* each table (which always contains 0), so that total table size is
|
||||
* halved.
|
||||
*
|
||||
* We thus have the following (size1 is the cumulative table size of
|
||||
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
|
||||
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
|
||||
*
|
||||
* n size1 size2 t1 t2
|
||||
* ---------------------------------------
|
||||
* 1 1024 4096 32 64
|
||||
* 2 2048 8192 16 32
|
||||
* 3 2688 10880 11 22
|
||||
* 4 4096 16384 8 16
|
||||
* 5 6272 25600 7 13
|
||||
* 6 10368 41984 6 11
|
||||
* 7 16896 73856 5 10
|
||||
* 8 32768 131072 4 8
|
||||
*
|
||||
* So there is a trade-off: a lower n makes the tables fit better in
|
||||
* L1 cache, but increases the number of memory accesses. The optimal
|
||||
* value depends on the amount of available L1 cache and the relative
|
||||
* impact of a cache miss.
|
||||
*
|
||||
* Experimentally, in ideal benchmark conditions (which are not necessarily
|
||||
* realistic with regards to L1 cache contention), it seems that n=8 is
|
||||
* the best value on "big" architectures (those with 32 kB or more of L1
|
||||
* cache), while n=4 is better on "small" architectures. This was tested
|
||||
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
|
||||
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
|
||||
* (8 kB L1 cache).
|
||||
*
|
||||
* Note: with n=1, the 32 tables (actually implemented as one big table)
|
||||
* are read entirely and sequentially, regardless of the input data,
|
||||
* thus avoiding any data-dependent table access pattern.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
//#include "hamsi-helper-4way.c"
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
|
||||
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
|
||||
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
|
||||
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
|
||||
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
|
||||
SPH_C32(0x6769756d)
|
||||
};
|
||||
|
||||
static const sph_u32 alpha_n[] = {
|
||||
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
|
||||
};
|
||||
|
||||
static const sph_u32 alpha_f[] = {
|
||||
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
|
||||
};
|
||||
|
||||
// imported from hamsi helper
|
||||
|
||||
/* Note: this table lists bits within each byte from least
|
||||
siginificant to most significant. */
|
||||
static const sph_u32 T512[64][16] = {
|
||||
{ SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000),
|
||||
SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9),
|
||||
SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030),
|
||||
SPH_C32(0xe7250000), SPH_C32(0x2f840000), SPH_C32(0x264f0000),
|
||||
SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137), SPH_C32(0x509f6984),
|
||||
SPH_C32(0x9e69af68) },
|
||||
{ SPH_C32(0xc96b0030), SPH_C32(0xe7250000), SPH_C32(0x2f840000),
|
||||
SPH_C32(0x264f0000), SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137),
|
||||
SPH_C32(0x509f6984), SPH_C32(0x9e69af68), SPH_C32(0x26600240),
|
||||
SPH_C32(0xddd80000), SPH_C32(0x722a0000), SPH_C32(0x4f060000),
|
||||
SPH_C32(0x936667ff), SPH_C32(0x29f944ce), SPH_C32(0x368b63d5),
|
||||
SPH_C32(0x0c26f262) },
|
||||
{ SPH_C32(0x145a3c00), SPH_C32(0xb9e90000), SPH_C32(0x61270000),
|
||||
SPH_C32(0xf1610000), SPH_C32(0xce613d6c), SPH_C32(0xb0493d78),
|
||||
SPH_C32(0x47a96720), SPH_C32(0xe18e24c5), SPH_C32(0x23671400),
|
||||
SPH_C32(0xc8b90000), SPH_C32(0xf4c70000), SPH_C32(0xfb750000),
|
||||
SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549), SPH_C32(0x02c40a3f),
|
||||
SPH_C32(0xdc24e61f) },
|
||||
{ SPH_C32(0x23671400), SPH_C32(0xc8b90000), SPH_C32(0xf4c70000),
|
||||
SPH_C32(0xfb750000), SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549),
|
||||
SPH_C32(0x02c40a3f), SPH_C32(0xdc24e61f), SPH_C32(0x373d2800),
|
||||
SPH_C32(0x71500000), SPH_C32(0x95e00000), SPH_C32(0x0a140000),
|
||||
SPH_C32(0xbdac1909), SPH_C32(0x48ef9831), SPH_C32(0x456d6d1f),
|
||||
SPH_C32(0x3daac2da) },
|
||||
{ SPH_C32(0x54285c00), SPH_C32(0xeaed0000), SPH_C32(0xc5d60000),
|
||||
SPH_C32(0xa1c50000), SPH_C32(0xb3a26770), SPH_C32(0x94a5c4e1),
|
||||
SPH_C32(0x6bb0419d), SPH_C32(0x551b3782), SPH_C32(0x9cbb1800),
|
||||
SPH_C32(0xb0d30000), SPH_C32(0x92510000), SPH_C32(0xed930000),
|
||||
SPH_C32(0x593a4345), SPH_C32(0xe114d5f4), SPH_C32(0x430633da),
|
||||
SPH_C32(0x78cace29) },
|
||||
{ SPH_C32(0x9cbb1800), SPH_C32(0xb0d30000), SPH_C32(0x92510000),
|
||||
SPH_C32(0xed930000), SPH_C32(0x593a4345), SPH_C32(0xe114d5f4),
|
||||
SPH_C32(0x430633da), SPH_C32(0x78cace29), SPH_C32(0xc8934400),
|
||||
SPH_C32(0x5a3e0000), SPH_C32(0x57870000), SPH_C32(0x4c560000),
|
||||
SPH_C32(0xea982435), SPH_C32(0x75b11115), SPH_C32(0x28b67247),
|
||||
SPH_C32(0x2dd1f9ab) },
|
||||
{ SPH_C32(0x29449c00), SPH_C32(0x64e70000), SPH_C32(0xf24b0000),
|
||||
SPH_C32(0xc2f30000), SPH_C32(0x0ede4e8f), SPH_C32(0x56c23745),
|
||||
SPH_C32(0xf3e04259), SPH_C32(0x8d0d9ec4), SPH_C32(0x466d0c00),
|
||||
SPH_C32(0x08620000), SPH_C32(0xdd5d0000), SPH_C32(0xbadd0000),
|
||||
SPH_C32(0x6a927942), SPH_C32(0x441f2b93), SPH_C32(0x218ace6f),
|
||||
SPH_C32(0xbf2c0be2) },
|
||||
{ SPH_C32(0x466d0c00), SPH_C32(0x08620000), SPH_C32(0xdd5d0000),
|
||||
SPH_C32(0xbadd0000), SPH_C32(0x6a927942), SPH_C32(0x441f2b93),
|
||||
SPH_C32(0x218ace6f), SPH_C32(0xbf2c0be2), SPH_C32(0x6f299000),
|
||||
SPH_C32(0x6c850000), SPH_C32(0x2f160000), SPH_C32(0x782e0000),
|
||||
SPH_C32(0x644c37cd), SPH_C32(0x12dd1cd6), SPH_C32(0xd26a8c36),
|
||||
SPH_C32(0x32219526) },
|
||||
{ SPH_C32(0xf6800005), SPH_C32(0x3443c000), SPH_C32(0x24070000),
|
||||
SPH_C32(0x8f3d0000), SPH_C32(0x21373bfb), SPH_C32(0x0ab8d5ae),
|
||||
SPH_C32(0xcdc58b19), SPH_C32(0xd795ba31), SPH_C32(0xa67f0001),
|
||||
SPH_C32(0x71378000), SPH_C32(0x19fc0000), SPH_C32(0x96db0000),
|
||||
SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3), SPH_C32(0x2c6d478f),
|
||||
SPH_C32(0xac8e6c88) },
|
||||
{ SPH_C32(0xa67f0001), SPH_C32(0x71378000), SPH_C32(0x19fc0000),
|
||||
SPH_C32(0x96db0000), SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3),
|
||||
SPH_C32(0x2c6d478f), SPH_C32(0xac8e6c88), SPH_C32(0x50ff0004),
|
||||
SPH_C32(0x45744000), SPH_C32(0x3dfb0000), SPH_C32(0x19e60000),
|
||||
SPH_C32(0x1bbc5606), SPH_C32(0xe1727b5d), SPH_C32(0xe1a8cc96),
|
||||
SPH_C32(0x7b1bd6b9) },
|
||||
{ SPH_C32(0xf7750009), SPH_C32(0xcf3cc000), SPH_C32(0xc3d60000),
|
||||
SPH_C32(0x04920000), SPH_C32(0x029519a9), SPH_C32(0xf8e836ba),
|
||||
SPH_C32(0x7a87f14e), SPH_C32(0x9e16981a), SPH_C32(0xd46a0000),
|
||||
SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000), SPH_C32(0x4a290000),
|
||||
SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c), SPH_C32(0x98369604),
|
||||
SPH_C32(0xf746c320) },
|
||||
{ SPH_C32(0xd46a0000), SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000),
|
||||
SPH_C32(0x4a290000), SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c),
|
||||
SPH_C32(0x98369604), SPH_C32(0xf746c320), SPH_C32(0x231f0009),
|
||||
SPH_C32(0x42f40000), SPH_C32(0x66790000), SPH_C32(0x4ebb0000),
|
||||
SPH_C32(0xfedb5bd3), SPH_C32(0x315cb0d6), SPH_C32(0xe2b1674a),
|
||||
SPH_C32(0x69505b3a) },
|
||||
{ SPH_C32(0x774400f0), SPH_C32(0xf15a0000), SPH_C32(0xf5b20000),
|
||||
SPH_C32(0x34140000), SPH_C32(0x89377e8c), SPH_C32(0x5a8bec25),
|
||||
SPH_C32(0x0bc3cd1e), SPH_C32(0xcf3775cb), SPH_C32(0xf46c0050),
|
||||
SPH_C32(0x96180000), SPH_C32(0x14a50000), SPH_C32(0x031f0000),
|
||||
SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19), SPH_C32(0x9ca470d2),
|
||||
SPH_C32(0x8a341574) },
|
||||
{ SPH_C32(0xf46c0050), SPH_C32(0x96180000), SPH_C32(0x14a50000),
|
||||
SPH_C32(0x031f0000), SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19),
|
||||
SPH_C32(0x9ca470d2), SPH_C32(0x8a341574), SPH_C32(0x832800a0),
|
||||
SPH_C32(0x67420000), SPH_C32(0xe1170000), SPH_C32(0x370b0000),
|
||||
SPH_C32(0xcba30034), SPH_C32(0x3c34923c), SPH_C32(0x9767bdcc),
|
||||
SPH_C32(0x450360bf) },
|
||||
{ SPH_C32(0xe8870170), SPH_C32(0x9d720000), SPH_C32(0x12db0000),
|
||||
SPH_C32(0xd4220000), SPH_C32(0xf2886b27), SPH_C32(0xa921e543),
|
||||
SPH_C32(0x4ef8b518), SPH_C32(0x618813b1), SPH_C32(0xb4370060),
|
||||
SPH_C32(0x0c4c0000), SPH_C32(0x56c20000), SPH_C32(0x5cae0000),
|
||||
SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825), SPH_C32(0x1b365f3d),
|
||||
SPH_C32(0xf3d45758) },
|
||||
{ SPH_C32(0xb4370060), SPH_C32(0x0c4c0000), SPH_C32(0x56c20000),
|
||||
SPH_C32(0x5cae0000), SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825),
|
||||
SPH_C32(0x1b365f3d), SPH_C32(0xf3d45758), SPH_C32(0x5cb00110),
|
||||
SPH_C32(0x913e0000), SPH_C32(0x44190000), SPH_C32(0x888c0000),
|
||||
SPH_C32(0x66dc7418), SPH_C32(0x921f1d66), SPH_C32(0x55ceea25),
|
||||
SPH_C32(0x925c44e9) },
|
||||
{ SPH_C32(0x0c720000), SPH_C32(0x49e50f00), SPH_C32(0x42790000),
|
||||
SPH_C32(0x5cea0000), SPH_C32(0x33aa301a), SPH_C32(0x15822514),
|
||||
SPH_C32(0x95a34b7b), SPH_C32(0xb44b0090), SPH_C32(0xfe220000),
|
||||
SPH_C32(0xa7580500), SPH_C32(0x25d10000), SPH_C32(0xf7600000),
|
||||
SPH_C32(0x893178da), SPH_C32(0x1fd4f860), SPH_C32(0x4ed0a315),
|
||||
SPH_C32(0xa123ff9f) },
|
||||
{ SPH_C32(0xfe220000), SPH_C32(0xa7580500), SPH_C32(0x25d10000),
|
||||
SPH_C32(0xf7600000), SPH_C32(0x893178da), SPH_C32(0x1fd4f860),
|
||||
SPH_C32(0x4ed0a315), SPH_C32(0xa123ff9f), SPH_C32(0xf2500000),
|
||||
SPH_C32(0xeebd0a00), SPH_C32(0x67a80000), SPH_C32(0xab8a0000),
|
||||
SPH_C32(0xba9b48c0), SPH_C32(0x0a56dd74), SPH_C32(0xdb73e86e),
|
||||
SPH_C32(0x1568ff0f) },
|
||||
{ SPH_C32(0x45180000), SPH_C32(0xa5b51700), SPH_C32(0xf96a0000),
|
||||
SPH_C32(0x3b480000), SPH_C32(0x1ecc142c), SPH_C32(0x231395d6),
|
||||
SPH_C32(0x16bca6b0), SPH_C32(0xdf33f4df), SPH_C32(0xb83d0000),
|
||||
SPH_C32(0x16710600), SPH_C32(0x379a0000), SPH_C32(0xf5b10000),
|
||||
SPH_C32(0x228161ac), SPH_C32(0xae48f145), SPH_C32(0x66241616),
|
||||
SPH_C32(0xc5c1eb3e) },
|
||||
{ SPH_C32(0xb83d0000), SPH_C32(0x16710600), SPH_C32(0x379a0000),
|
||||
SPH_C32(0xf5b10000), SPH_C32(0x228161ac), SPH_C32(0xae48f145),
|
||||
SPH_C32(0x66241616), SPH_C32(0xc5c1eb3e), SPH_C32(0xfd250000),
|
||||
SPH_C32(0xb3c41100), SPH_C32(0xcef00000), SPH_C32(0xcef90000),
|
||||
SPH_C32(0x3c4d7580), SPH_C32(0x8d5b6493), SPH_C32(0x7098b0a6),
|
||||
SPH_C32(0x1af21fe1) },
|
||||
{ SPH_C32(0x75a40000), SPH_C32(0xc28b2700), SPH_C32(0x94a40000),
|
||||
SPH_C32(0x90f50000), SPH_C32(0xfb7857e0), SPH_C32(0x49ce0bae),
|
||||
SPH_C32(0x1767c483), SPH_C32(0xaedf667e), SPH_C32(0xd1660000),
|
||||
SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000), SPH_C32(0xf6940000),
|
||||
SPH_C32(0x03024527), SPH_C32(0xcf70fcf2), SPH_C32(0xb4431b17),
|
||||
SPH_C32(0x857f3c2b) },
|
||||
{ SPH_C32(0xd1660000), SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000),
|
||||
SPH_C32(0xf6940000), SPH_C32(0x03024527), SPH_C32(0xcf70fcf2),
|
||||
SPH_C32(0xb4431b17), SPH_C32(0x857f3c2b), SPH_C32(0xa4c20000),
|
||||
SPH_C32(0xd9372400), SPH_C32(0x0a480000), SPH_C32(0x66610000),
|
||||
SPH_C32(0xf87a12c7), SPH_C32(0x86bef75c), SPH_C32(0xa324df94),
|
||||
SPH_C32(0x2ba05a55) },
|
||||
{ SPH_C32(0x75c90003), SPH_C32(0x0e10c000), SPH_C32(0xd1200000),
|
||||
SPH_C32(0xbaea0000), SPH_C32(0x8bc42f3e), SPH_C32(0x8758b757),
|
||||
SPH_C32(0xbb28761d), SPH_C32(0x00b72e2b), SPH_C32(0xeecf0001),
|
||||
SPH_C32(0x6f564000), SPH_C32(0xf33e0000), SPH_C32(0xa79e0000),
|
||||
SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5), SPH_C32(0x4a3b40ba),
|
||||
SPH_C32(0xfeabf254) },
|
||||
{ SPH_C32(0xeecf0001), SPH_C32(0x6f564000), SPH_C32(0xf33e0000),
|
||||
SPH_C32(0xa79e0000), SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5),
|
||||
SPH_C32(0x4a3b40ba), SPH_C32(0xfeabf254), SPH_C32(0x9b060002),
|
||||
SPH_C32(0x61468000), SPH_C32(0x221e0000), SPH_C32(0x1d740000),
|
||||
SPH_C32(0x36715d27), SPH_C32(0x30495c92), SPH_C32(0xf11336a7),
|
||||
SPH_C32(0xfe1cdc7f) },
|
||||
{ SPH_C32(0x86790000), SPH_C32(0x3f390002), SPH_C32(0xe19ae000),
|
||||
SPH_C32(0x98560000), SPH_C32(0x9565670e), SPH_C32(0x4e88c8ea),
|
||||
SPH_C32(0xd3dd4944), SPH_C32(0x161ddab9), SPH_C32(0x30b70000),
|
||||
SPH_C32(0xe5d00000), SPH_C32(0xf4f46000), SPH_C32(0x42c40000),
|
||||
SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460), SPH_C32(0x21afa1ea),
|
||||
SPH_C32(0xb0a51834) },
|
||||
{ SPH_C32(0x30b70000), SPH_C32(0xe5d00000), SPH_C32(0xf4f46000),
|
||||
SPH_C32(0x42c40000), SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460),
|
||||
SPH_C32(0x21afa1ea), SPH_C32(0xb0a51834), SPH_C32(0xb6ce0000),
|
||||
SPH_C32(0xdae90002), SPH_C32(0x156e8000), SPH_C32(0xda920000),
|
||||
SPH_C32(0xf6dd5a64), SPH_C32(0x36325c8a), SPH_C32(0xf272e8ae),
|
||||
SPH_C32(0xa6b8c28d) },
|
||||
{ SPH_C32(0x14190000), SPH_C32(0x23ca003c), SPH_C32(0x50df0000),
|
||||
SPH_C32(0x44b60000), SPH_C32(0x1b6c67b0), SPH_C32(0x3cf3ac75),
|
||||
SPH_C32(0x61e610b0), SPH_C32(0xdbcadb80), SPH_C32(0xe3430000),
|
||||
SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000), SPH_C32(0xaa4e0000),
|
||||
SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15), SPH_C32(0x123db156),
|
||||
SPH_C32(0x3a4e99d7) },
|
||||
{ SPH_C32(0xe3430000), SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000),
|
||||
SPH_C32(0xaa4e0000), SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15),
|
||||
SPH_C32(0x123db156), SPH_C32(0x3a4e99d7), SPH_C32(0xf75a0000),
|
||||
SPH_C32(0x19840028), SPH_C32(0xa2190000), SPH_C32(0xeef80000),
|
||||
SPH_C32(0xc0722516), SPH_C32(0x19981260), SPH_C32(0x73dba1e6),
|
||||
SPH_C32(0xe1844257) },
|
||||
{ SPH_C32(0x54500000), SPH_C32(0x0671005c), SPH_C32(0x25ae0000),
|
||||
SPH_C32(0x6a1e0000), SPH_C32(0x2ea54edf), SPH_C32(0x664e8512),
|
||||
SPH_C32(0xbfba18c3), SPH_C32(0x7e715d17), SPH_C32(0xbc8d0000),
|
||||
SPH_C32(0xfc3b0018), SPH_C32(0x19830000), SPH_C32(0xd10b0000),
|
||||
SPH_C32(0xae1878c4), SPH_C32(0x42a69856), SPH_C32(0x0012da37),
|
||||
SPH_C32(0x2c3b504e) },
|
||||
{ SPH_C32(0xbc8d0000), SPH_C32(0xfc3b0018), SPH_C32(0x19830000),
|
||||
SPH_C32(0xd10b0000), SPH_C32(0xae1878c4), SPH_C32(0x42a69856),
|
||||
SPH_C32(0x0012da37), SPH_C32(0x2c3b504e), SPH_C32(0xe8dd0000),
|
||||
SPH_C32(0xfa4a0044), SPH_C32(0x3c2d0000), SPH_C32(0xbb150000),
|
||||
SPH_C32(0x80bd361b), SPH_C32(0x24e81d44), SPH_C32(0xbfa8c2f4),
|
||||
SPH_C32(0x524a0d59) },
|
||||
{ SPH_C32(0x69510000), SPH_C32(0xd4e1009c), SPH_C32(0xc3230000),
|
||||
SPH_C32(0xac2f0000), SPH_C32(0xe4950bae), SPH_C32(0xcea415dc),
|
||||
SPH_C32(0x87ec287c), SPH_C32(0xbce1a3ce), SPH_C32(0xc6730000),
|
||||
SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000), SPH_C32(0x218d0000),
|
||||
SPH_C32(0x23111587), SPH_C32(0x7913512f), SPH_C32(0x1d28ac88),
|
||||
SPH_C32(0x378dd173) },
|
||||
{ SPH_C32(0xc6730000), SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000),
|
||||
SPH_C32(0x218d0000), SPH_C32(0x23111587), SPH_C32(0x7913512f),
|
||||
SPH_C32(0x1d28ac88), SPH_C32(0x378dd173), SPH_C32(0xaf220000),
|
||||
SPH_C32(0x7b6c0090), SPH_C32(0x67e20000), SPH_C32(0x8da20000),
|
||||
SPH_C32(0xc7841e29), SPH_C32(0xb7b744f3), SPH_C32(0x9ac484f4),
|
||||
SPH_C32(0x8b6c72bd) },
|
||||
{ SPH_C32(0xcc140000), SPH_C32(0xa5630000), SPH_C32(0x5ab90780),
|
||||
SPH_C32(0x3b500000), SPH_C32(0x4bd013ff), SPH_C32(0x879b3418),
|
||||
SPH_C32(0x694348c1), SPH_C32(0xca5a87fe), SPH_C32(0x819e0000),
|
||||
SPH_C32(0xec570000), SPH_C32(0x66320280), SPH_C32(0x95f30000),
|
||||
SPH_C32(0x5da92802), SPH_C32(0x48f43cbc), SPH_C32(0xe65aa22d),
|
||||
SPH_C32(0x8e67b7fa) },
|
||||
{ SPH_C32(0x819e0000), SPH_C32(0xec570000), SPH_C32(0x66320280),
|
||||
SPH_C32(0x95f30000), SPH_C32(0x5da92802), SPH_C32(0x48f43cbc),
|
||||
SPH_C32(0xe65aa22d), SPH_C32(0x8e67b7fa), SPH_C32(0x4d8a0000),
|
||||
SPH_C32(0x49340000), SPH_C32(0x3c8b0500), SPH_C32(0xaea30000),
|
||||
SPH_C32(0x16793bfd), SPH_C32(0xcf6f08a4), SPH_C32(0x8f19eaec),
|
||||
SPH_C32(0x443d3004) },
|
||||
{ SPH_C32(0x78230000), SPH_C32(0x12fc0000), SPH_C32(0xa93a0b80),
|
||||
SPH_C32(0x90a50000), SPH_C32(0x713e2879), SPH_C32(0x7ee98924),
|
||||
SPH_C32(0xf08ca062), SPH_C32(0x636f8bab), SPH_C32(0x02af0000),
|
||||
SPH_C32(0xb7280000), SPH_C32(0xba1c0300), SPH_C32(0x56980000),
|
||||
SPH_C32(0xba8d45d3), SPH_C32(0x8048c667), SPH_C32(0xa95c149a),
|
||||
SPH_C32(0xf4f6ea7b) },
|
||||
{ SPH_C32(0x02af0000), SPH_C32(0xb7280000), SPH_C32(0xba1c0300),
|
||||
SPH_C32(0x56980000), SPH_C32(0xba8d45d3), SPH_C32(0x8048c667),
|
||||
SPH_C32(0xa95c149a), SPH_C32(0xf4f6ea7b), SPH_C32(0x7a8c0000),
|
||||
SPH_C32(0xa5d40000), SPH_C32(0x13260880), SPH_C32(0xc63d0000),
|
||||
SPH_C32(0xcbb36daa), SPH_C32(0xfea14f43), SPH_C32(0x59d0b4f8),
|
||||
SPH_C32(0x979961d0) },
|
||||
{ SPH_C32(0xac480000), SPH_C32(0x1ba60000), SPH_C32(0x45fb1380),
|
||||
SPH_C32(0x03430000), SPH_C32(0x5a85316a), SPH_C32(0x1fb250b6),
|
||||
SPH_C32(0xfe72c7fe), SPH_C32(0x91e478f6), SPH_C32(0x1e4e0000),
|
||||
SPH_C32(0xdecf0000), SPH_C32(0x6df80180), SPH_C32(0x77240000),
|
||||
SPH_C32(0xec47079e), SPH_C32(0xf4a0694e), SPH_C32(0xcda31812),
|
||||
SPH_C32(0x98aa496e) },
|
||||
{ SPH_C32(0x1e4e0000), SPH_C32(0xdecf0000), SPH_C32(0x6df80180),
|
||||
SPH_C32(0x77240000), SPH_C32(0xec47079e), SPH_C32(0xf4a0694e),
|
||||
SPH_C32(0xcda31812), SPH_C32(0x98aa496e), SPH_C32(0xb2060000),
|
||||
SPH_C32(0xc5690000), SPH_C32(0x28031200), SPH_C32(0x74670000),
|
||||
SPH_C32(0xb6c236f4), SPH_C32(0xeb1239f8), SPH_C32(0x33d1dfec),
|
||||
SPH_C32(0x094e3198) },
|
||||
{ SPH_C32(0xaec30000), SPH_C32(0x9c4f0001), SPH_C32(0x79d1e000),
|
||||
SPH_C32(0x2c150000), SPH_C32(0x45cc75b3), SPH_C32(0x6650b736),
|
||||
SPH_C32(0xab92f78f), SPH_C32(0xa312567b), SPH_C32(0xdb250000),
|
||||
SPH_C32(0x09290000), SPH_C32(0x49aac000), SPH_C32(0x81e10000),
|
||||
SPH_C32(0xcafe6b59), SPH_C32(0x42793431), SPH_C32(0x43566b76),
|
||||
SPH_C32(0xe86cba2e) },
|
||||
{ SPH_C32(0xdb250000), SPH_C32(0x09290000), SPH_C32(0x49aac000),
|
||||
SPH_C32(0x81e10000), SPH_C32(0xcafe6b59), SPH_C32(0x42793431),
|
||||
SPH_C32(0x43566b76), SPH_C32(0xe86cba2e), SPH_C32(0x75e60000),
|
||||
SPH_C32(0x95660001), SPH_C32(0x307b2000), SPH_C32(0xadf40000),
|
||||
SPH_C32(0x8f321eea), SPH_C32(0x24298307), SPH_C32(0xe8c49cf9),
|
||||
SPH_C32(0x4b7eec55) },
|
||||
{ SPH_C32(0x58430000), SPH_C32(0x807e0000), SPH_C32(0x78330001),
|
||||
SPH_C32(0xc66b3800), SPH_C32(0xe7375cdc), SPH_C32(0x79ad3fdd),
|
||||
SPH_C32(0xac73fe6f), SPH_C32(0x3a4479b1), SPH_C32(0x1d5a0000),
|
||||
SPH_C32(0x2b720000), SPH_C32(0x488d0000), SPH_C32(0xaf611800),
|
||||
SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0), SPH_C32(0x81a20429),
|
||||
SPH_C32(0x1e7536a6) },
|
||||
{ SPH_C32(0x1d5a0000), SPH_C32(0x2b720000), SPH_C32(0x488d0000),
|
||||
SPH_C32(0xaf611800), SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0),
|
||||
SPH_C32(0x81a20429), SPH_C32(0x1e7536a6), SPH_C32(0x45190000),
|
||||
SPH_C32(0xab0c0000), SPH_C32(0x30be0001), SPH_C32(0x690a2000),
|
||||
SPH_C32(0xc2fc7219), SPH_C32(0xb1d4800d), SPH_C32(0x2dd1fa46),
|
||||
SPH_C32(0x24314f17) },
|
||||
{ SPH_C32(0xa53b0000), SPH_C32(0x14260000), SPH_C32(0x4e30001e),
|
||||
SPH_C32(0x7cae0000), SPH_C32(0x8f9e0dd5), SPH_C32(0x78dfaa3d),
|
||||
SPH_C32(0xf73168d8), SPH_C32(0x0b1b4946), SPH_C32(0x07ed0000),
|
||||
SPH_C32(0xb2500000), SPH_C32(0x8774000a), SPH_C32(0x970d0000),
|
||||
SPH_C32(0x437223ae), SPH_C32(0x48c76ea4), SPH_C32(0xf4786222),
|
||||
SPH_C32(0x9075b1ce) },
|
||||
{ SPH_C32(0x07ed0000), SPH_C32(0xb2500000), SPH_C32(0x8774000a),
|
||||
SPH_C32(0x970d0000), SPH_C32(0x437223ae), SPH_C32(0x48c76ea4),
|
||||
SPH_C32(0xf4786222), SPH_C32(0x9075b1ce), SPH_C32(0xa2d60000),
|
||||
SPH_C32(0xa6760000), SPH_C32(0xc9440014), SPH_C32(0xeba30000),
|
||||
SPH_C32(0xccec2e7b), SPH_C32(0x3018c499), SPH_C32(0x03490afa),
|
||||
SPH_C32(0x9b6ef888) },
|
||||
{ SPH_C32(0x88980000), SPH_C32(0x1f940000), SPH_C32(0x7fcf002e),
|
||||
SPH_C32(0xfb4e0000), SPH_C32(0xf158079a), SPH_C32(0x61ae9167),
|
||||
SPH_C32(0xa895706c), SPH_C32(0xe6107494), SPH_C32(0x0bc20000),
|
||||
SPH_C32(0xdb630000), SPH_C32(0x7e88000c), SPH_C32(0x15860000),
|
||||
SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43), SPH_C32(0xf460449e),
|
||||
SPH_C32(0xd8b61463) },
|
||||
{ SPH_C32(0x0bc20000), SPH_C32(0xdb630000), SPH_C32(0x7e88000c),
|
||||
SPH_C32(0x15860000), SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43),
|
||||
SPH_C32(0xf460449e), SPH_C32(0xd8b61463), SPH_C32(0x835a0000),
|
||||
SPH_C32(0xc4f70000), SPH_C32(0x01470022), SPH_C32(0xeec80000),
|
||||
SPH_C32(0x60a54f69), SPH_C32(0x142f2a24), SPH_C32(0x5cf534f2),
|
||||
SPH_C32(0x3ea660f7) },
|
||||
{ SPH_C32(0x52500000), SPH_C32(0x29540000), SPH_C32(0x6a61004e),
|
||||
SPH_C32(0xf0ff0000), SPH_C32(0x9a317eec), SPH_C32(0x452341ce),
|
||||
SPH_C32(0xcf568fe5), SPH_C32(0x5303130f), SPH_C32(0x538d0000),
|
||||
SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006), SPH_C32(0x56ff0000),
|
||||
SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9), SPH_C32(0xa9444018),
|
||||
SPH_C32(0x7f975691) },
|
||||
{ SPH_C32(0x538d0000), SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006),
|
||||
SPH_C32(0x56ff0000), SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9),
|
||||
SPH_C32(0xa9444018), SPH_C32(0x7f975691), SPH_C32(0x01dd0000),
|
||||
SPH_C32(0x80a80000), SPH_C32(0xf4960048), SPH_C32(0xa6000000),
|
||||
SPH_C32(0x90d57ea2), SPH_C32(0xd7e68c37), SPH_C32(0x6612cffd),
|
||||
SPH_C32(0x2c94459e) },
|
||||
{ SPH_C32(0xe6280000), SPH_C32(0x4c4b0000), SPH_C32(0xa8550000),
|
||||
SPH_C32(0xd3d002e0), SPH_C32(0xd86130b8), SPH_C32(0x98a7b0da),
|
||||
SPH_C32(0x289506b4), SPH_C32(0xd75a4897), SPH_C32(0xf0c50000),
|
||||
SPH_C32(0x59230000), SPH_C32(0x45820000), SPH_C32(0xe18d00c0),
|
||||
SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699), SPH_C32(0xcbe0fe1c),
|
||||
SPH_C32(0x56a7b19f) },
|
||||
{ SPH_C32(0xf0c50000), SPH_C32(0x59230000), SPH_C32(0x45820000),
|
||||
SPH_C32(0xe18d00c0), SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699),
|
||||
SPH_C32(0xcbe0fe1c), SPH_C32(0x56a7b19f), SPH_C32(0x16ed0000),
|
||||
SPH_C32(0x15680000), SPH_C32(0xedd70000), SPH_C32(0x325d0220),
|
||||
SPH_C32(0xe30c3689), SPH_C32(0x5a4ae643), SPH_C32(0xe375f8a8),
|
||||
SPH_C32(0x81fdf908) },
|
||||
{ SPH_C32(0xb4310000), SPH_C32(0x77330000), SPH_C32(0xb15d0000),
|
||||
SPH_C32(0x7fd004e0), SPH_C32(0x78a26138), SPH_C32(0xd116c35d),
|
||||
SPH_C32(0xd256d489), SPH_C32(0x4e6f74de), SPH_C32(0xe3060000),
|
||||
SPH_C32(0xbdc10000), SPH_C32(0x87130000), SPH_C32(0xbff20060),
|
||||
SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751), SPH_C32(0x73c5ab06),
|
||||
SPH_C32(0x5bd61539) },
|
||||
{ SPH_C32(0xe3060000), SPH_C32(0xbdc10000), SPH_C32(0x87130000),
|
||||
SPH_C32(0xbff20060), SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751),
|
||||
SPH_C32(0x73c5ab06), SPH_C32(0x5bd61539), SPH_C32(0x57370000),
|
||||
SPH_C32(0xcaf20000), SPH_C32(0x364e0000), SPH_C32(0xc0220480),
|
||||
SPH_C32(0x56186b22), SPH_C32(0x5ca3f40c), SPH_C32(0xa1937f8f),
|
||||
SPH_C32(0x15b961e7) },
|
||||
{ SPH_C32(0x02f20000), SPH_C32(0xa2810000), SPH_C32(0x873f0000),
|
||||
SPH_C32(0xe36c7800), SPH_C32(0x1e1d74ef), SPH_C32(0x073d2bd6),
|
||||
SPH_C32(0xc4c23237), SPH_C32(0x7f32259e), SPH_C32(0xbadd0000),
|
||||
SPH_C32(0x13ad0000), SPH_C32(0xb7e70000), SPH_C32(0xf7282800),
|
||||
SPH_C32(0xdf45144d), SPH_C32(0x361ac33a), SPH_C32(0xea5a8d14),
|
||||
SPH_C32(0x2a2c18f0) },
|
||||
{ SPH_C32(0xbadd0000), SPH_C32(0x13ad0000), SPH_C32(0xb7e70000),
|
||||
SPH_C32(0xf7282800), SPH_C32(0xdf45144d), SPH_C32(0x361ac33a),
|
||||
SPH_C32(0xea5a8d14), SPH_C32(0x2a2c18f0), SPH_C32(0xb82f0000),
|
||||
SPH_C32(0xb12c0000), SPH_C32(0x30d80000), SPH_C32(0x14445000),
|
||||
SPH_C32(0xc15860a2), SPH_C32(0x3127e8ec), SPH_C32(0x2e98bf23),
|
||||
SPH_C32(0x551e3d6e) },
|
||||
{ SPH_C32(0x1e6c0000), SPH_C32(0xc4420000), SPH_C32(0x8a2e0000),
|
||||
SPH_C32(0xbcb6b800), SPH_C32(0x2c4413b6), SPH_C32(0x8bfdd3da),
|
||||
SPH_C32(0x6a0c1bc8), SPH_C32(0xb99dc2eb), SPH_C32(0x92560000),
|
||||
SPH_C32(0x1eda0000), SPH_C32(0xea510000), SPH_C32(0xe8b13000),
|
||||
SPH_C32(0xa93556a5), SPH_C32(0xebfb6199), SPH_C32(0xb15c2254),
|
||||
SPH_C32(0x33c5244f) },
|
||||
{ SPH_C32(0x92560000), SPH_C32(0x1eda0000), SPH_C32(0xea510000),
|
||||
SPH_C32(0xe8b13000), SPH_C32(0xa93556a5), SPH_C32(0xebfb6199),
|
||||
SPH_C32(0xb15c2254), SPH_C32(0x33c5244f), SPH_C32(0x8c3a0000),
|
||||
SPH_C32(0xda980000), SPH_C32(0x607f0000), SPH_C32(0x54078800),
|
||||
SPH_C32(0x85714513), SPH_C32(0x6006b243), SPH_C32(0xdb50399c),
|
||||
SPH_C32(0x8a58e6a4) },
|
||||
{ SPH_C32(0x033d0000), SPH_C32(0x08b30000), SPH_C32(0xf33a0000),
|
||||
SPH_C32(0x3ac20007), SPH_C32(0x51298a50), SPH_C32(0x6b6e661f),
|
||||
SPH_C32(0x0ea5cfe3), SPH_C32(0xe6da7ffe), SPH_C32(0xa8da0000),
|
||||
SPH_C32(0x96be0000), SPH_C32(0x5c1d0000), SPH_C32(0x07da0002),
|
||||
SPH_C32(0x7d669583), SPH_C32(0x1f98708a), SPH_C32(0xbb668808),
|
||||
SPH_C32(0xda878000) },
|
||||
{ SPH_C32(0xa8da0000), SPH_C32(0x96be0000), SPH_C32(0x5c1d0000),
|
||||
SPH_C32(0x07da0002), SPH_C32(0x7d669583), SPH_C32(0x1f98708a),
|
||||
SPH_C32(0xbb668808), SPH_C32(0xda878000), SPH_C32(0xabe70000),
|
||||
SPH_C32(0x9e0d0000), SPH_C32(0xaf270000), SPH_C32(0x3d180005),
|
||||
SPH_C32(0x2c4f1fd3), SPH_C32(0x74f61695), SPH_C32(0xb5c347eb),
|
||||
SPH_C32(0x3c5dfffe) },
|
||||
{ SPH_C32(0x01930000), SPH_C32(0xe7820000), SPH_C32(0xedfb0000),
|
||||
SPH_C32(0xcf0c000b), SPH_C32(0x8dd08d58), SPH_C32(0xbca3b42e),
|
||||
SPH_C32(0x063661e1), SPH_C32(0x536f9e7b), SPH_C32(0x92280000),
|
||||
SPH_C32(0xdc850000), SPH_C32(0x57fa0000), SPH_C32(0x56dc0003),
|
||||
SPH_C32(0xbae92316), SPH_C32(0x5aefa30c), SPH_C32(0x90cef752),
|
||||
SPH_C32(0x7b1675d7) },
|
||||
{ SPH_C32(0x92280000), SPH_C32(0xdc850000), SPH_C32(0x57fa0000),
|
||||
SPH_C32(0x56dc0003), SPH_C32(0xbae92316), SPH_C32(0x5aefa30c),
|
||||
SPH_C32(0x90cef752), SPH_C32(0x7b1675d7), SPH_C32(0x93bb0000),
|
||||
SPH_C32(0x3b070000), SPH_C32(0xba010000), SPH_C32(0x99d00008),
|
||||
SPH_C32(0x3739ae4e), SPH_C32(0xe64c1722), SPH_C32(0x96f896b3),
|
||||
SPH_C32(0x2879ebac) },
|
||||
{ SPH_C32(0x5fa80000), SPH_C32(0x56030000), SPH_C32(0x43ae0000),
|
||||
SPH_C32(0x64f30013), SPH_C32(0x257e86bf), SPH_C32(0x1311944e),
|
||||
SPH_C32(0x541e95bf), SPH_C32(0x8ea4db69), SPH_C32(0x00440000),
|
||||
SPH_C32(0x7f480000), SPH_C32(0xda7c0000), SPH_C32(0x2a230001),
|
||||
SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87), SPH_C32(0x030a9e60),
|
||||
SPH_C32(0xbe0a679e) },
|
||||
{ SPH_C32(0x00440000), SPH_C32(0x7f480000), SPH_C32(0xda7c0000),
|
||||
SPH_C32(0x2a230001), SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87),
|
||||
SPH_C32(0x030a9e60), SPH_C32(0xbe0a679e), SPH_C32(0x5fec0000),
|
||||
SPH_C32(0x294b0000), SPH_C32(0x99d20000), SPH_C32(0x4ed00012),
|
||||
SPH_C32(0x1ed34f73), SPH_C32(0xbaa708c9), SPH_C32(0x57140bdf),
|
||||
SPH_C32(0x30aebcf7) },
|
||||
{ SPH_C32(0xee930000), SPH_C32(0xd6070000), SPH_C32(0x92c10000),
|
||||
SPH_C32(0x2b9801e0), SPH_C32(0x9451287c), SPH_C32(0x3b6cfb57),
|
||||
SPH_C32(0x45312374), SPH_C32(0x201f6a64), SPH_C32(0x7b280000),
|
||||
SPH_C32(0x57420000), SPH_C32(0xa9e50000), SPH_C32(0x634300a0),
|
||||
SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb), SPH_C32(0x27f83b03),
|
||||
SPH_C32(0xc7ff60f0) },
|
||||
{ SPH_C32(0x7b280000), SPH_C32(0x57420000), SPH_C32(0xa9e50000),
|
||||
SPH_C32(0x634300a0), SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb),
|
||||
SPH_C32(0x27f83b03), SPH_C32(0xc7ff60f0), SPH_C32(0x95bb0000),
|
||||
SPH_C32(0x81450000), SPH_C32(0x3b240000), SPH_C32(0x48db0140),
|
||||
SPH_C32(0x0a8a6c53), SPH_C32(0x56f56eec), SPH_C32(0x62c91877),
|
||||
SPH_C32(0xe7e00a94) }
|
||||
};
|
||||
|
||||
#define INPUT_BIG \
|
||||
do { \
|
||||
__m256i db = *buf; \
|
||||
const sph_u32 *tp = &T512[0][0]; \
|
||||
m0 = m256_zero; \
|
||||
m1 = m256_zero; \
|
||||
m2 = m256_zero; \
|
||||
m3 = m256_zero; \
|
||||
m4 = m256_zero; \
|
||||
m5 = m256_zero; \
|
||||
m6 = m256_zero; \
|
||||
m7 = m256_zero; \
|
||||
for ( int u = 0; u < 64; u++ ) \
|
||||
{ \
|
||||
__m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
|
||||
dm = mm256_negate_32( _mm256_or_si256( dm, \
|
||||
_mm256_slli_epi64( dm, 32 ) ) ); \
|
||||
m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x1], tp[0x0], tp[0x1], tp[0x0], \
|
||||
tp[0x1], tp[0x0], tp[0x1], tp[0x0] ) ) ); \
|
||||
m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x3], tp[0x2], tp[0x3], tp[0x2], \
|
||||
tp[0x3], tp[0x2], tp[0x3], tp[0x2] ) ) ); \
|
||||
m2 = _mm256_xor_si256( m2, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x5], tp[0x4], tp[0x5], tp[0x4], \
|
||||
tp[0x5], tp[0x4], tp[0x5], tp[0x4] ) ) ); \
|
||||
m3 = _mm256_xor_si256( m3, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x7], tp[0x6], tp[0x7], tp[0x6], \
|
||||
tp[0x7], tp[0x6], tp[0x7], tp[0x6] ) ) ); \
|
||||
m4 = _mm256_xor_si256( m4, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0x9], tp[0x8], tp[0x9], tp[0x8], \
|
||||
tp[0x9], tp[0x8], tp[0x9], tp[0x8] ) ) ); \
|
||||
m5 = _mm256_xor_si256( m5, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0xB], tp[0xA], tp[0xB], tp[0xA], \
|
||||
tp[0xB], tp[0xA], tp[0xB], tp[0xA] ) ) ); \
|
||||
m6 = _mm256_xor_si256( m6, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0xD], tp[0xC], tp[0xD], tp[0xC], \
|
||||
tp[0xD], tp[0xC], tp[0xD], tp[0xC] ) ) ); \
|
||||
m7 = _mm256_xor_si256( m7, _mm256_and_si256( dm, \
|
||||
_mm256_set_epi32( tp[0xF], tp[0xE], tp[0xF], tp[0xE], \
|
||||
tp[0xF], tp[0xE], tp[0xF], tp[0xE] ) ) ); \
|
||||
tp += 0x10; \
|
||||
db = _mm256_srli_epi64( db, 1 ); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SBOX( a, b, c, d ) \
|
||||
do { \
|
||||
__m256i t; \
|
||||
t = a; \
|
||||
a = _mm256_and_si256( a, c ); \
|
||||
a = _mm256_xor_si256( a, d ); \
|
||||
c = _mm256_xor_si256( c, b ); \
|
||||
c = _mm256_xor_si256( c, a ); \
|
||||
d = _mm256_or_si256( d, t ); \
|
||||
d = _mm256_xor_si256( d, b ); \
|
||||
t = _mm256_xor_si256( t, c ); \
|
||||
b = d; \
|
||||
d = _mm256_or_si256( d, t ); \
|
||||
d = _mm256_xor_si256( d, a ); \
|
||||
a = _mm256_and_si256( a, b ); \
|
||||
t = _mm256_xor_si256( t, a ); \
|
||||
b = _mm256_xor_si256( b, d ); \
|
||||
b = _mm256_xor_si256( b, t ); \
|
||||
a = c; \
|
||||
c = b; \
|
||||
b = d; \
|
||||
d = mm256_not( t ); \
|
||||
} while (0)
|
||||
|
||||
#define L( a, b, c, d ) \
|
||||
do { \
|
||||
a = mm256_rotl_32( a, 13 ); \
|
||||
c = mm256_rotl_32( c, 3 ); \
|
||||
b = _mm256_xor_si256( b, _mm256_xor_si256( a, c ) ); \
|
||||
d = _mm256_xor_si256( d, _mm256_xor_si256( c, \
|
||||
_mm256_slli_epi32( a, 3 ) ) ); \
|
||||
b = mm256_rotl_32( b, 1 ); \
|
||||
d = mm256_rotl_32( d, 7 ); \
|
||||
a = _mm256_xor_si256( a, _mm256_xor_si256( b, d ) ); \
|
||||
c = _mm256_xor_si256( c, _mm256_xor_si256( d, \
|
||||
_mm256_slli_epi32( b, 7 ) ) ); \
|
||||
a = mm256_rotl_32( a, 5 ); \
|
||||
c = mm256_rotl_32( c, 22 ); \
|
||||
} while (0)
|
||||
|
||||
#define DECL_STATE_BIG \
|
||||
__m256i c0, c1, c2, c3, c4, c5, c6, c7; \
|
||||
|
||||
#define READ_STATE_BIG(sc) \
|
||||
do { \
|
||||
c0 = sc->h[0x0]; \
|
||||
c1 = sc->h[0x1]; \
|
||||
c2 = sc->h[0x2]; \
|
||||
c3 = sc->h[0x3]; \
|
||||
c4 = sc->h[0x4]; \
|
||||
c5 = sc->h[0x5]; \
|
||||
c6 = sc->h[0x6]; \
|
||||
c7 = sc->h[0x7]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE_BIG(sc) \
|
||||
do { \
|
||||
sc->h[0x0] = c0; \
|
||||
sc->h[0x1] = c1; \
|
||||
sc->h[0x2] = c2; \
|
||||
sc->h[0x3] = c3; \
|
||||
sc->h[0x4] = c4; \
|
||||
sc->h[0x5] = c5; \
|
||||
sc->h[0x6] = c6; \
|
||||
sc->h[0x7] = c7; \
|
||||
} while (0)
|
||||
|
||||
#define s0 m0
|
||||
#define s1 c0
|
||||
#define s2 m1
|
||||
#define s3 c1
|
||||
#define s4 c2
|
||||
#define s5 m2
|
||||
#define s6 c3
|
||||
#define s7 m3
|
||||
#define s8 m4
|
||||
#define s9 c4
|
||||
#define sA m5
|
||||
#define sB c5
|
||||
#define sC c6
|
||||
#define sD m6
|
||||
#define sE c7
|
||||
#define sF m7
|
||||
|
||||
#define ROUND_BIG(rc, alpha) \
|
||||
do { \
|
||||
__m256i t0, t1, t2, t3; \
|
||||
s0 = _mm256_xor_si256( s0, _mm256_set_epi32( \
|
||||
alpha[0x01] ^ (rc), alpha[0x00], alpha[0x01] ^ (rc), alpha[0x00], \
|
||||
alpha[0x01] ^ (rc), alpha[0x00], alpha[0x01] ^ (rc), alpha[0x00] ) ); \
|
||||
s1 = _mm256_xor_si256( s1, _mm256_set_epi32( \
|
||||
alpha[0x03], alpha[0x02], alpha[0x03], alpha[0x02], \
|
||||
alpha[0x03], alpha[0x02], alpha[0x03], alpha[0x02] ) ); \
|
||||
s2 = _mm256_xor_si256( s2, _mm256_set_epi32( \
|
||||
alpha[0x05], alpha[0x04], alpha[0x05], alpha[0x04], \
|
||||
alpha[0x05], alpha[0x04], alpha[0x05], alpha[0x04] ) ); \
|
||||
s3 = _mm256_xor_si256( s3, _mm256_set_epi32( \
|
||||
alpha[0x07], alpha[0x06], alpha[0x07], alpha[0x06], \
|
||||
alpha[0x07], alpha[0x06], alpha[0x07], alpha[0x06] ) ); \
|
||||
s4 = _mm256_xor_si256( s4, _mm256_set_epi32( \
|
||||
alpha[0x09], alpha[0x08], alpha[0x09], alpha[0x08], \
|
||||
alpha[0x09], alpha[0x08], alpha[0x09], alpha[0x08] ) ); \
|
||||
s5 = _mm256_xor_si256( s5, _mm256_set_epi32( \
|
||||
alpha[0x0B], alpha[0x0A], alpha[0x0B], alpha[0x0A], \
|
||||
alpha[0x0B], alpha[0x0A], alpha[0x0B], alpha[0x0A] ) ); \
|
||||
s6 = _mm256_xor_si256( s6, _mm256_set_epi32( \
|
||||
alpha[0x0D], alpha[0x0C], alpha[0x0D], alpha[0x0C], \
|
||||
alpha[0x0D], alpha[0x0C], alpha[0x0D], alpha[0x0C] ) ); \
|
||||
s7 = _mm256_xor_si256( s7, _mm256_set_epi32( \
|
||||
alpha[0x0F], alpha[0x0E], alpha[0x0F], alpha[0x0E], \
|
||||
alpha[0x0F], alpha[0x0E], alpha[0x0F], alpha[0x0E] ) ); \
|
||||
s8 = _mm256_xor_si256( s8, _mm256_set_epi32( \
|
||||
alpha[0x11], alpha[0x10], alpha[0x11], alpha[0x10], \
|
||||
alpha[0x11], alpha[0x10], alpha[0x11], alpha[0x10] ) ); \
|
||||
s9 = _mm256_xor_si256( s9, _mm256_set_epi32( \
|
||||
alpha[0x13], alpha[0x12], alpha[0x13], alpha[0x12], \
|
||||
alpha[0x13], alpha[0x12], alpha[0x13], alpha[0x12] ) ); \
|
||||
sA = _mm256_xor_si256( sA, _mm256_set_epi32( \
|
||||
alpha[0x15], alpha[0x14], alpha[0x15], alpha[0x14], \
|
||||
alpha[0x15], alpha[0x14], alpha[0x15], alpha[0x14] ) ); \
|
||||
sB = _mm256_xor_si256( sB, _mm256_set_epi32( \
|
||||
alpha[0x17], alpha[0x16], alpha[0x17], alpha[0x16], \
|
||||
alpha[0x17], alpha[0x16], alpha[0x17], alpha[0x16] ) ); \
|
||||
sC = _mm256_xor_si256( sC, _mm256_set_epi32( \
|
||||
alpha[0x19], alpha[0x18], alpha[0x19], alpha[0x18], \
|
||||
alpha[0x19], alpha[0x18], alpha[0x19], alpha[0x18] ) ); \
|
||||
sD = _mm256_xor_si256( sD, _mm256_set_epi32( \
|
||||
alpha[0x1B], alpha[0x1A], alpha[0x1B], alpha[0x1A], \
|
||||
alpha[0x1B], alpha[0x1A], alpha[0x1B], alpha[0x1A] ) ); \
|
||||
sE = _mm256_xor_si256( sE, _mm256_set_epi32( \
|
||||
alpha[0x1D], alpha[0x1C], alpha[0x1D], alpha[0x1C], \
|
||||
alpha[0x1D], alpha[0x1C], alpha[0x1D], alpha[0x1C] ) ); \
|
||||
sF = _mm256_xor_si256( sF, _mm256_set_epi32( \
|
||||
alpha[0x1F], alpha[0x1E], alpha[0x1F], alpha[0x1E], \
|
||||
alpha[0x1F], alpha[0x1E], alpha[0x1F], alpha[0x1E] ) ); \
|
||||
\
|
||||
SBOX( s0, s4, s8, sC ); \
|
||||
SBOX( s1, s5, s9, sD ); \
|
||||
SBOX( s2, s6, sA, sE ); \
|
||||
SBOX( s3, s7, sB, sF ); \
|
||||
\
|
||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s4, 4 ), \
|
||||
_mm256_bslli_epi128( s5, 4 ), 0xAA ); \
|
||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sD, 4 ), \
|
||||
_mm256_bslli_epi128( sE, 4 ), 0xAA ); \
|
||||
L( s0, t1, s9, t3 ); \
|
||||
s4 = _mm256_blend_epi32( s4, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
|
||||
s5 = _mm256_blend_epi32( s5, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
|
||||
sD = _mm256_blend_epi32( sD, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
|
||||
sE = _mm256_blend_epi32( sE, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
|
||||
\
|
||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s5, 4 ), \
|
||||
_mm256_bslli_epi128( s6, 4 ), 0xAA ); \
|
||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sE, 4 ), \
|
||||
_mm256_bslli_epi128( sF, 4 ), 0xAA ); \
|
||||
L( s1, t1, sA, t3 ); \
|
||||
s5 = _mm256_blend_epi32( s5, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
|
||||
s6 = _mm256_blend_epi32( s6, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
|
||||
sE = _mm256_blend_epi32( sE, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
|
||||
sF = _mm256_blend_epi32( sF, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
|
||||
\
|
||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s6, 4 ), \
|
||||
_mm256_bslli_epi128( s7, 4 ), 0xAA ); \
|
||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sF, 4 ), \
|
||||
_mm256_bslli_epi128( sC, 4 ), 0xAA ); \
|
||||
L( s2, t1, sB, t3 ); \
|
||||
s6 = _mm256_blend_epi32( s6, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
|
||||
s7 = _mm256_blend_epi32( s7, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
|
||||
sF = _mm256_blend_epi32( sF, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
|
||||
sC = _mm256_blend_epi32( sC, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
|
||||
\
|
||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s7, 4 ), \
|
||||
_mm256_bslli_epi128( s4, 4 ), 0xAA ); \
|
||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( sC, 4 ), \
|
||||
_mm256_bslli_epi128( sD, 4 ), 0xAA ); \
|
||||
L( s3, t1, s8, t3 ); \
|
||||
s7 = _mm256_blend_epi32( s7, _mm256_bslli_epi128( t1, 4 ), 0xAA );\
|
||||
s4 = _mm256_blend_epi32( s4, _mm256_bsrli_epi128( t1, 4 ), 0x55 );\
|
||||
sC = _mm256_blend_epi32( sC, _mm256_bslli_epi128( t3, 4 ), 0xAA );\
|
||||
sD = _mm256_blend_epi32( sD, _mm256_bsrli_epi128( t3, 4 ), 0x55 );\
|
||||
\
|
||||
t0 = _mm256_blend_epi32( s0, _mm256_bslli_epi128( s8, 4 ), 0xAA ); \
|
||||
t1 = _mm256_blend_epi32( s1, s9, 0xAA ); \
|
||||
t2 = _mm256_blend_epi32( _mm256_bsrli_epi128( s2, 4 ), sA, 0xAA ); \
|
||||
t3 = _mm256_blend_epi32( _mm256_bsrli_epi128( s3, 4 ), \
|
||||
_mm256_bslli_epi128( sB, 4 ), 0xAA ); \
|
||||
L( t0, t1, t2, t3 ); \
|
||||
s0 = _mm256_blend_epi32( s0, t0, 0x55 ); \
|
||||
s8 = _mm256_blend_epi32( s8, _mm256_bsrli_epi128( t0, 4 ), 0x55 ); \
|
||||
s1 = _mm256_blend_epi32( s1, t1, 0x55 ); \
|
||||
s9 = _mm256_blend_epi32( s9, t1, 0xAA ); \
|
||||
s2 = _mm256_blend_epi32( s2, _mm256_bslli_epi128( t2, 4 ), 0xAA ); \
|
||||
sA = _mm256_blend_epi32( sA, t2, 0xAA ); \
|
||||
s3 = _mm256_blend_epi32( s3, _mm256_bslli_epi128( t3, 4 ), 0xAA ); \
|
||||
sB = _mm256_blend_epi32( sB, _mm256_bsrli_epi128( t3, 4 ), 0x55 ); \
|
||||
\
|
||||
t0 = _mm256_blend_epi32( _mm256_bsrli_epi128( s4, 4 ), sC, 0xAA ); \
|
||||
t1 = _mm256_blend_epi32( _mm256_bsrli_epi128( s5, 4 ), \
|
||||
_mm256_bslli_epi128( sD, 4 ), 0xAA ); \
|
||||
t2 = _mm256_blend_epi32( s6, _mm256_bslli_epi128( sE, 4 ), 0xAA ); \
|
||||
t3 = _mm256_blend_epi32( s7, sF, 0xAA ); \
|
||||
L( t0, t1, t2, t3 ); \
|
||||
s4 = _mm256_blend_epi32( s4, _mm256_bslli_epi128( t0, 4 ), 0xAA ); \
|
||||
sC = _mm256_blend_epi32( sC, t0, 0xAA ); \
|
||||
s5 = _mm256_blend_epi32( s5, _mm256_bslli_epi128( t1, 4 ), 0xAA ); \
|
||||
sD = _mm256_blend_epi32( sD, _mm256_bsrli_epi128( t1, 4 ), 0x55 ); \
|
||||
s6 = _mm256_blend_epi32( s6, t2, 0x55 ); \
|
||||
sE = _mm256_blend_epi32( sE, _mm256_bsrli_epi128( t2, 4 ), 0x55 ); \
|
||||
s7 = _mm256_blend_epi32( s7, t3, 0x55 ); \
|
||||
sF = _mm256_blend_epi32( sF, t3, 0xAA ); \
|
||||
} while (0)
|
||||
|
||||
#define P_BIG \
|
||||
do { \
|
||||
ROUND_BIG(0, alpha_n); \
|
||||
ROUND_BIG(1, alpha_n); \
|
||||
ROUND_BIG(2, alpha_n); \
|
||||
ROUND_BIG(3, alpha_n); \
|
||||
ROUND_BIG(4, alpha_n); \
|
||||
ROUND_BIG(5, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_BIG \
|
||||
do { \
|
||||
ROUND_BIG( 0, alpha_f); \
|
||||
ROUND_BIG( 1, alpha_f); \
|
||||
ROUND_BIG( 2, alpha_f); \
|
||||
ROUND_BIG( 3, alpha_f); \
|
||||
ROUND_BIG( 4, alpha_f); \
|
||||
ROUND_BIG( 5, alpha_f); \
|
||||
ROUND_BIG( 6, alpha_f); \
|
||||
ROUND_BIG( 7, alpha_f); \
|
||||
ROUND_BIG( 8, alpha_f); \
|
||||
ROUND_BIG( 9, alpha_f); \
|
||||
ROUND_BIG(10, alpha_f); \
|
||||
ROUND_BIG(11, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#define T_BIG \
|
||||
do { /* order is important */ \
|
||||
c7 = sc->h[ 0x7 ] = _mm256_xor_si256( sc->h[ 0x7 ], sB ); \
|
||||
c6 = sc->h[ 0x6 ] = _mm256_xor_si256( sc->h[ 0x6 ], sA ); \
|
||||
c5 = sc->h[ 0x5 ] = _mm256_xor_si256( sc->h[ 0x5 ], s9 ); \
|
||||
c4 = sc->h[ 0x4 ] = _mm256_xor_si256( sc->h[ 0x4 ], s8 ); \
|
||||
c3 = sc->h[ 0x3 ] = _mm256_xor_si256( sc->h[ 0x3 ], s3 ); \
|
||||
c2 = sc->h[ 0x2 ] = _mm256_xor_si256( sc->h[ 0x2 ], s2 ); \
|
||||
c1 = sc->h[ 0x1 ] = _mm256_xor_si256( sc->h[ 0x1 ], s1 ); \
|
||||
c0 = sc->h[ 0x0 ] = _mm256_xor_si256( sc->h[ 0x0 ], s0 ); \
|
||||
} while (0)
|
||||
|
||||
void hamsi_big( hamsi_4way_big_context *sc, __m256i *buf, size_t num )
|
||||
{
|
||||
DECL_STATE_BIG
|
||||
sph_u32 tmp;
|
||||
|
||||
tmp = SPH_T32( (sph_u32)num << 6 );
|
||||
sc->count_low = SPH_T32( sc->count_low + tmp );
|
||||
sc->count_high += (sph_u32)( (num >> 13) >> 13 );
|
||||
if ( sc->count_low < tmp )
|
||||
sc->count_high++;
|
||||
|
||||
READ_STATE_BIG( sc );
|
||||
while ( num-- > 0 )
|
||||
{
|
||||
__m256i m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
|
||||
INPUT_BIG;
|
||||
P_BIG;
|
||||
T_BIG;
|
||||
buf++;
|
||||
}
|
||||
WRITE_STATE_BIG( sc );
|
||||
}
|
||||
|
||||
void hamsi_big_final( hamsi_4way_big_context *sc, __m256i *buf )
|
||||
{
|
||||
__m256i m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
DECL_STATE_BIG
|
||||
READ_STATE_BIG( sc );
|
||||
INPUT_BIG;
|
||||
PF_BIG;
|
||||
T_BIG;
|
||||
WRITE_STATE_BIG( sc );
|
||||
}
|
||||
|
||||
void hamsi512_4way_init( hamsi_4way_big_context *sc )
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
sph_u32 lo, hi;
|
||||
sc->count_high = sc->count_low = 0;
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
{
|
||||
lo = 2*i;
|
||||
hi = 2*i + 1;
|
||||
sc->h[i] = _mm256_set_epi32( IV512[hi], IV512[lo], IV512[hi], IV512[lo],
|
||||
IV512[hi], IV512[lo], IV512[hi], IV512[lo] );
|
||||
}
|
||||
}
|
||||
|
||||
void hamsi512_4way( hamsi_4way_big_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
|
||||
// It looks like the only way to get in here is if core was previously called
|
||||
// with a very small len
|
||||
// That's not likely even with 80 byte input so deprecate partial len
|
||||
/*
|
||||
if ( sc->partial_len != 0 )
|
||||
{
|
||||
size_t mlen;
|
||||
|
||||
mlen = 8 - sc->partial_len;
|
||||
if ( len < mlen )
|
||||
{
|
||||
memcpy_256( sc->partial + (sc->partial_len >> 3), data, len>>3 );
|
||||
sc->partial_len += len;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy_256( sc->partial + (sc->partial_len >> 3), data, mlen>>3 );
|
||||
len -= mlen;
|
||||
vdata += mlen>>3;
|
||||
hamsi_big( sc, sc->partial, 1 );
|
||||
sc->partial_len = 0;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
hamsi_big( sc, vdata, len>>3 );
|
||||
vdata += ( (len& ~(size_t)7) >> 3 );
|
||||
len &= (size_t)7;
|
||||
memcpy_256( sc->buf, vdata, len>>3 );
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
|
||||
{
|
||||
__m256i *out = (__m256i*)dst;
|
||||
__m256i pad[1];
|
||||
size_t u;
|
||||
int ch, cl;
|
||||
|
||||
sph_enc32be( &ch, sc->count_high );
|
||||
sph_enc32be( &cl, sc->count_low + ( sc->partial_len << 3 ) );
|
||||
pad[0] = _mm256_set_epi32( cl, ch, cl, ch, cl, ch, cl, ch );
|
||||
sc->buf[0] = _mm256_set_epi32( 0UL, 0x80UL, 0UL, 0x80UL,
|
||||
0UL, 0x80UL, 0UL, 0x80UL );
|
||||
hamsi_big( sc, sc->buf, 1 );
|
||||
hamsi_big_final( sc, pad );
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
out[u] = mm256_bswap_32( sc->h[u] );
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
72
algo/hamsi/hamsi-hash-4way.h
Normal file
72
algo/hamsi/hamsi-hash-4way.h
Normal file
@@ -0,0 +1,72 @@
|
||||
/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* Hamsi interface. This code implements Hamsi with the recommended
|
||||
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_hamsi.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef HAMSI_4WAY_H__
|
||||
#define HAMSI_4WAY_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#define SPH_SIZE_hamsi512 512
|
||||
|
||||
// Partial is only scalar but needs pointer ref for hamsi-helper
|
||||
// deprecate partial_len
|
||||
typedef struct {
|
||||
__m256i h[8];
|
||||
__m256i buf[1];
|
||||
size_t partial_len;
|
||||
sph_u32 count_high, count_low;
|
||||
} hamsi_4way_big_context;
|
||||
|
||||
typedef hamsi_4way_big_context hamsi512_4way_context;
|
||||
|
||||
void hamsi512_4way_init( hamsi512_4way_context *sc );
|
||||
void hamsi512_4way( hamsi512_4way_context *sc, const void *data, size_t len );
|
||||
void hamsi512_4way_close( hamsi512_4way_context *sc, void *dst );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
115
algo/haval/haval-4way-helper.c
Normal file
115
algo/haval/haval-4way-helper.c
Normal file
@@ -0,0 +1,115 @@
|
||||
/* $Id: haval_helper.c 218 2010-06-08 17:06:34Z tp $ */
|
||||
/*
|
||||
* Helper code, included (three times !) by HAVAL implementation.
|
||||
*
|
||||
* TODO: try to merge this with md_helper.c.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#undef SPH_XCAT
|
||||
#define SPH_XCAT(a, b) SPH_XCAT_(a, b)
|
||||
#undef SPH_XCAT_
|
||||
#define SPH_XCAT_(a, b) a ## b
|
||||
|
||||
static void
|
||||
SPH_XCAT(SPH_XCAT(haval, PASSES), _4way)
|
||||
( haval_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m128i *vdata = (__m128i*)data;
|
||||
unsigned current;
|
||||
|
||||
current = (unsigned)sc->count_low & 127U;
|
||||
while ( len > 0 )
|
||||
{
|
||||
unsigned clen;
|
||||
sph_u32 clow, clow2;
|
||||
|
||||
clen = 128U - current;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_128( sc->buf + (current>>2), vdata, clen>>2 );
|
||||
vdata += clen>>2;
|
||||
current += clen;
|
||||
len -= clen;
|
||||
if ( current == 128U )
|
||||
{
|
||||
DSTATE;
|
||||
IN_PREPARE(sc->buf);
|
||||
RSTATE;
|
||||
SPH_XCAT(CORE, PASSES)(INW);
|
||||
WSTATE;
|
||||
current = 0;
|
||||
}
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32(clow + clen);
|
||||
sc->count_low = clow2;
|
||||
if ( clow2 < clow )
|
||||
sc->count_high ++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
SPH_XCAT(SPH_XCAT(haval, PASSES), _4way_close)( haval_4way_context *sc,
|
||||
void *dst)
|
||||
{
|
||||
unsigned current;
|
||||
DSTATE;
|
||||
|
||||
current = (unsigned)sc->count_low & 127UL;
|
||||
|
||||
sc->buf[ current>>2 ] = m128_one_32;
|
||||
current += 4;
|
||||
RSTATE;
|
||||
if ( current > 116UL )
|
||||
{
|
||||
memset_zero_128( sc->buf + ( current>>2 ), (128UL-current) >> 2 );
|
||||
do
|
||||
{
|
||||
IN_PREPARE(sc->buf);
|
||||
SPH_XCAT(CORE, PASSES)(INW);
|
||||
} while (0);
|
||||
current = 0;
|
||||
}
|
||||
|
||||
uint32_t t1, t2;
|
||||
memset_zero_128( sc->buf + ( current>>2 ), (116UL-current) >> 2 );
|
||||
t1 = 0x01 | (PASSES << 3);
|
||||
t2 = sc->olen << 3;
|
||||
sc->buf[ 116>>2 ] = _mm_set1_epi32( ( t1 << 16 ) | ( t2 << 24 ) );
|
||||
sc->buf[ 120>>2 ] = _mm_set1_epi32( sc->count_low << 3 );
|
||||
sc->buf[ 124>>2 ] = _mm_set1_epi32( (sc->count_high << 3)
|
||||
| (sc->count_low >> 29) );
|
||||
do
|
||||
{
|
||||
IN_PREPARE(sc->buf);
|
||||
SPH_XCAT(CORE, PASSES)(INW);
|
||||
} while (0);
|
||||
WSTATE;
|
||||
haval_4way_out( sc, dst );
|
||||
}
|
522
algo/haval/haval-hash-4way.c
Normal file
522
algo/haval/haval-hash-4way.c
Normal file
@@ -0,0 +1,522 @@
|
||||
/* $Id: haval.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* HAVAL implementation.
|
||||
*
|
||||
* The HAVAL reference paper is of questionable clarity with regards to
|
||||
* some details such as endianness of bits within a byte, bytes within
|
||||
* a 32-bit word, or the actual ordering of words within a stream of
|
||||
* words. This implementation has been made compatible with the reference
|
||||
* implementation available on: http://labs.calyptix.com/haval.php
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include "haval-hash-4way.h"
|
||||
|
||||
#if defined (__AVX__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
//#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAVAL
|
||||
#define SPH_SMALL_FOOTPRINT_HAVAL 1
|
||||
//#endif
|
||||
|
||||
#define F1(x6, x5, x4, x3, x2, x1, x0) \
|
||||
_mm_xor_si128( x0, \
|
||||
_mm_xor_si128( _mm_and_si128(_mm_xor_si128( x0, x4 ), x1 ), \
|
||||
_mm_xor_si128( _mm_and_si128( x2, x5 ), \
|
||||
_mm_and_si128( x3, x6 ) ) ) ) \
|
||||
|
||||
#define F2(x6, x5, x4, x3, x2, x1, x0) \
|
||||
_mm_xor_si128( \
|
||||
_mm_and_si128( x2, \
|
||||
_mm_xor_si128( _mm_andnot_si128( x3, x1 ), \
|
||||
_mm_xor_si128( _mm_and_si128( x4, x5 ), \
|
||||
_mm_xor_si128( x6, x0 ) ) ) ), \
|
||||
_mm_xor_si128( \
|
||||
_mm_and_si128( x4, _mm_xor_si128( x1, x5 ) ), \
|
||||
_mm_xor_si128( _mm_and_si128( x3, x5 ), x0 ) ) ) \
|
||||
|
||||
#define F3(x6, x5, x4, x3, x2, x1, x0) \
|
||||
_mm_xor_si128( \
|
||||
_mm_and_si128( x3, \
|
||||
_mm_xor_si128( _mm_and_si128( x1, x2 ), \
|
||||
_mm_xor_si128( x6, x0 ) ) ), \
|
||||
_mm_xor_si128( _mm_xor_si128(_mm_and_si128( x1, x4 ), \
|
||||
_mm_and_si128( x2, x5 ) ), x0 ) )
|
||||
|
||||
#define F4(x6, x5, x4, x3, x2, x1, x0) \
|
||||
_mm_xor_si128( \
|
||||
_mm_xor_si128( \
|
||||
_mm_and_si128( x3, \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( x1, x2 ), \
|
||||
_mm_or_si128( x4, x6 ) ), x5 ) ), \
|
||||
_mm_and_si128( x4, \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( mm_not(x2), x5 ), \
|
||||
_mm_xor_si128( x1, x6 ) ), x0 ) ) ), \
|
||||
_mm_xor_si128( _mm_and_si128( x2, x6 ), x0 ) )
|
||||
|
||||
|
||||
#define F5(x6, x5, x4, x3, x2, x1, x0) \
|
||||
_mm_xor_si128( \
|
||||
_mm_and_si128( x0, \
|
||||
mm_not( _mm_xor_si128( \
|
||||
_mm_and_si128( _mm_and_si128( x1, x2 ), x3 ), x5 ) ) ), \
|
||||
_mm_xor_si128( _mm_xor_si128( _mm_and_si128( x1, x4 ), \
|
||||
_mm_and_si128( x2, x5 ) ), \
|
||||
_mm_and_si128( x3, x6 ) ) )
|
||||
|
||||
/*
|
||||
* The macros below integrate the phi() permutations, depending on the
|
||||
* pass and the total number of passes.
|
||||
*/
|
||||
|
||||
#define FP3_1(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F1(x1, x0, x3, x5, x6, x2, x4)
|
||||
#define FP3_2(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F2(x4, x2, x1, x0, x5, x3, x6)
|
||||
#define FP3_3(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F3(x6, x1, x2, x3, x4, x5, x0)
|
||||
|
||||
#define FP4_1(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F1(x2, x6, x1, x4, x5, x3, x0)
|
||||
#define FP4_2(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F2(x3, x5, x2, x0, x1, x6, x4)
|
||||
#define FP4_3(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F3(x1, x4, x3, x6, x0, x2, x5)
|
||||
#define FP4_4(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F4(x6, x4, x0, x5, x2, x1, x3)
|
||||
|
||||
#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F1(x3, x4, x1, x0, x5, x2, x6)
|
||||
#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F2(x6, x2, x1, x0, x3, x4, x5)
|
||||
#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F3(x2, x6, x0, x4, x3, x1, x5)
|
||||
#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F4(x1, x5, x3, x2, x0, x4, x6)
|
||||
#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \
|
||||
F5(x2, x5, x0, x6, x4, x3, x1)
|
||||
|
||||
/*
|
||||
* One step, for "n" passes, pass number "p" (1 <= p <= n), using
|
||||
* input word number "w" and step constant "c".
|
||||
*/
|
||||
#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) \
|
||||
do { \
|
||||
__m128i t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
|
||||
x7 = _mm_add_epi32( _mm_add_epi32( mm_rotr_32( t, 7 ), \
|
||||
mm_rotr_32( x7, 11 ) ), \
|
||||
_mm_add_epi32( w, _mm_set1_epi32( c ) ) ); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* PASSy(n, in) computes pass number "y", for a total of "n", using the
|
||||
* one-argument macro "in" to access input words. Current state is assumed
|
||||
* to be held in variables "s0" to "s7".
|
||||
*/
|
||||
|
||||
//#if SPH_SMALL_FOOTPRINT_HAVAL
|
||||
|
||||
#define PASS1(n, in) do { \
|
||||
unsigned pass_count; \
|
||||
for (pass_count = 0; pass_count < 32; pass_count += 8) { \
|
||||
STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, \
|
||||
in(pass_count + 0), SPH_C32(0x00000000)); \
|
||||
STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, \
|
||||
in(pass_count + 1), SPH_C32(0x00000000)); \
|
||||
STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, \
|
||||
in(pass_count + 2), SPH_C32(0x00000000)); \
|
||||
STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, \
|
||||
in(pass_count + 3), SPH_C32(0x00000000)); \
|
||||
STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, \
|
||||
in(pass_count + 4), SPH_C32(0x00000000)); \
|
||||
STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, \
|
||||
in(pass_count + 5), SPH_C32(0x00000000)); \
|
||||
STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, \
|
||||
in(pass_count + 6), SPH_C32(0x00000000)); \
|
||||
STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, \
|
||||
in(pass_count + 7), SPH_C32(0x00000000)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define PASSG(p, n, in) do { \
|
||||
unsigned pass_count; \
|
||||
for (pass_count = 0; pass_count < 32; pass_count += 8) { \
|
||||
STEP(n, p, s7, s6, s5, s4, s3, s2, s1, s0, \
|
||||
in(MP ## p[pass_count + 0]), \
|
||||
RK ## p[pass_count + 0]); \
|
||||
STEP(n, p, s6, s5, s4, s3, s2, s1, s0, s7, \
|
||||
in(MP ## p[pass_count + 1]), \
|
||||
RK ## p[pass_count + 1]); \
|
||||
STEP(n, p, s5, s4, s3, s2, s1, s0, s7, s6, \
|
||||
in(MP ## p[pass_count + 2]), \
|
||||
RK ## p[pass_count + 2]); \
|
||||
STEP(n, p, s4, s3, s2, s1, s0, s7, s6, s5, \
|
||||
in(MP ## p[pass_count + 3]), \
|
||||
RK ## p[pass_count + 3]); \
|
||||
STEP(n, p, s3, s2, s1, s0, s7, s6, s5, s4, \
|
||||
in(MP ## p[pass_count + 4]), \
|
||||
RK ## p[pass_count + 4]); \
|
||||
STEP(n, p, s2, s1, s0, s7, s6, s5, s4, s3, \
|
||||
in(MP ## p[pass_count + 5]), \
|
||||
RK ## p[pass_count + 5]); \
|
||||
STEP(n, p, s1, s0, s7, s6, s5, s4, s3, s2, \
|
||||
in(MP ## p[pass_count + 6]), \
|
||||
RK ## p[pass_count + 6]); \
|
||||
STEP(n, p, s0, s7, s6, s5, s4, s3, s2, s1, \
|
||||
in(MP ## p[pass_count + 7]), \
|
||||
RK ## p[pass_count + 7]); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define PASS2(n, in) PASSG(2, n, in)
|
||||
#define PASS3(n, in) PASSG(3, n, in)
|
||||
#define PASS4(n, in) PASSG(4, n, in)
|
||||
#define PASS5(n, in) PASSG(5, n, in)
|
||||
|
||||
static const unsigned MP2[32] = {
|
||||
5, 14, 26, 18, 11, 28, 7, 16,
|
||||
0, 23, 20, 22, 1, 10, 4, 8,
|
||||
30, 3, 21, 9, 17, 24, 29, 6,
|
||||
19, 12, 15, 13, 2, 25, 31, 27
|
||||
};
|
||||
|
||||
static const unsigned MP3[32] = {
|
||||
19, 9, 4, 20, 28, 17, 8, 22,
|
||||
29, 14, 25, 12, 24, 30, 16, 26,
|
||||
31, 15, 7, 3, 1, 0, 18, 27,
|
||||
13, 6, 21, 10, 23, 11, 5, 2
|
||||
};
|
||||
|
||||
static const unsigned MP4[32] = {
|
||||
24, 4, 0, 14, 2, 7, 28, 23,
|
||||
26, 6, 30, 20, 18, 25, 19, 3,
|
||||
22, 11, 31, 21, 8, 27, 12, 9,
|
||||
1, 29, 5, 15, 17, 10, 16, 13
|
||||
};
|
||||
|
||||
static const unsigned MP5[32] = {
|
||||
27, 3, 21, 26, 17, 11, 20, 29,
|
||||
19, 0, 12, 7, 13, 8, 31, 10,
|
||||
5, 9, 14, 30, 18, 6, 28, 24,
|
||||
2, 23, 16, 22, 4, 1, 25, 15
|
||||
};
|
||||
|
||||
static const sph_u32 RK2[32] = {
|
||||
SPH_C32(0x452821E6), SPH_C32(0x38D01377),
|
||||
SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C),
|
||||
SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD),
|
||||
SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917),
|
||||
SPH_C32(0x9216D5D9), SPH_C32(0x8979FB1B),
|
||||
SPH_C32(0xD1310BA6), SPH_C32(0x98DFB5AC),
|
||||
SPH_C32(0x2FFD72DB), SPH_C32(0xD01ADFB7),
|
||||
SPH_C32(0xB8E1AFED), SPH_C32(0x6A267E96),
|
||||
SPH_C32(0xBA7C9045), SPH_C32(0xF12C7F99),
|
||||
SPH_C32(0x24A19947), SPH_C32(0xB3916CF7),
|
||||
SPH_C32(0x0801F2E2), SPH_C32(0x858EFC16),
|
||||
SPH_C32(0x636920D8), SPH_C32(0x71574E69),
|
||||
SPH_C32(0xA458FEA3), SPH_C32(0xF4933D7E),
|
||||
SPH_C32(0x0D95748F), SPH_C32(0x728EB658),
|
||||
SPH_C32(0x718BCD58), SPH_C32(0x82154AEE),
|
||||
SPH_C32(0x7B54A41D), SPH_C32(0xC25A59B5)
|
||||
};
|
||||
|
||||
static const sph_u32 RK3[32] = {
|
||||
SPH_C32(0x9C30D539), SPH_C32(0x2AF26013),
|
||||
SPH_C32(0xC5D1B023), SPH_C32(0x286085F0),
|
||||
SPH_C32(0xCA417918), SPH_C32(0xB8DB38EF),
|
||||
SPH_C32(0x8E79DCB0), SPH_C32(0x603A180E),
|
||||
SPH_C32(0x6C9E0E8B), SPH_C32(0xB01E8A3E),
|
||||
SPH_C32(0xD71577C1), SPH_C32(0xBD314B27),
|
||||
SPH_C32(0x78AF2FDA), SPH_C32(0x55605C60),
|
||||
SPH_C32(0xE65525F3), SPH_C32(0xAA55AB94),
|
||||
SPH_C32(0x57489862), SPH_C32(0x63E81440),
|
||||
SPH_C32(0x55CA396A), SPH_C32(0x2AAB10B6),
|
||||
SPH_C32(0xB4CC5C34), SPH_C32(0x1141E8CE),
|
||||
SPH_C32(0xA15486AF), SPH_C32(0x7C72E993),
|
||||
SPH_C32(0xB3EE1411), SPH_C32(0x636FBC2A),
|
||||
SPH_C32(0x2BA9C55D), SPH_C32(0x741831F6),
|
||||
SPH_C32(0xCE5C3E16), SPH_C32(0x9B87931E),
|
||||
SPH_C32(0xAFD6BA33), SPH_C32(0x6C24CF5C)
|
||||
};
|
||||
|
||||
static const sph_u32 RK4[32] = {
|
||||
SPH_C32(0x7A325381), SPH_C32(0x28958677),
|
||||
SPH_C32(0x3B8F4898), SPH_C32(0x6B4BB9AF),
|
||||
SPH_C32(0xC4BFE81B), SPH_C32(0x66282193),
|
||||
SPH_C32(0x61D809CC), SPH_C32(0xFB21A991),
|
||||
SPH_C32(0x487CAC60), SPH_C32(0x5DEC8032),
|
||||
SPH_C32(0xEF845D5D), SPH_C32(0xE98575B1),
|
||||
SPH_C32(0xDC262302), SPH_C32(0xEB651B88),
|
||||
SPH_C32(0x23893E81), SPH_C32(0xD396ACC5),
|
||||
SPH_C32(0x0F6D6FF3), SPH_C32(0x83F44239),
|
||||
SPH_C32(0x2E0B4482), SPH_C32(0xA4842004),
|
||||
SPH_C32(0x69C8F04A), SPH_C32(0x9E1F9B5E),
|
||||
SPH_C32(0x21C66842), SPH_C32(0xF6E96C9A),
|
||||
SPH_C32(0x670C9C61), SPH_C32(0xABD388F0),
|
||||
SPH_C32(0x6A51A0D2), SPH_C32(0xD8542F68),
|
||||
SPH_C32(0x960FA728), SPH_C32(0xAB5133A3),
|
||||
SPH_C32(0x6EEF0B6C), SPH_C32(0x137A3BE4)
|
||||
};
|
||||
|
||||
static const sph_u32 RK5[32] = {
|
||||
SPH_C32(0xBA3BF050), SPH_C32(0x7EFB2A98),
|
||||
SPH_C32(0xA1F1651D), SPH_C32(0x39AF0176),
|
||||
SPH_C32(0x66CA593E), SPH_C32(0x82430E88),
|
||||
SPH_C32(0x8CEE8619), SPH_C32(0x456F9FB4),
|
||||
SPH_C32(0x7D84A5C3), SPH_C32(0x3B8B5EBE),
|
||||
SPH_C32(0xE06F75D8), SPH_C32(0x85C12073),
|
||||
SPH_C32(0x401A449F), SPH_C32(0x56C16AA6),
|
||||
SPH_C32(0x4ED3AA62), SPH_C32(0x363F7706),
|
||||
SPH_C32(0x1BFEDF72), SPH_C32(0x429B023D),
|
||||
SPH_C32(0x37D0D724), SPH_C32(0xD00A1248),
|
||||
SPH_C32(0xDB0FEAD3), SPH_C32(0x49F1C09B),
|
||||
SPH_C32(0x075372C9), SPH_C32(0x80991B7B),
|
||||
SPH_C32(0x25D479D8), SPH_C32(0xF6E8DEF7),
|
||||
SPH_C32(0xE3FE501A), SPH_C32(0xB6794C3B),
|
||||
SPH_C32(0x976CE0BD), SPH_C32(0x04C006BA),
|
||||
SPH_C32(0xC1A94FB6), SPH_C32(0x409F60C4)
|
||||
};
|
||||
|
||||
#define SAVE_STATE \
|
||||
__m128i u0, u1, u2, u3, u4, u5, u6, u7; \
|
||||
do { \
|
||||
u0 = s0; \
|
||||
u1 = s1; \
|
||||
u2 = s2; \
|
||||
u3 = s3; \
|
||||
u4 = s4; \
|
||||
u5 = s5; \
|
||||
u6 = s6; \
|
||||
u7 = s7; \
|
||||
} while (0)
|
||||
|
||||
#define UPDATE_STATE \
|
||||
do { \
|
||||
s0 = _mm_add_epi32( s0, u0 ); \
|
||||
s1 = _mm_add_epi32( s1, u1 ); \
|
||||
s2 = _mm_add_epi32( s2, u2 ); \
|
||||
s3 = _mm_add_epi32( s3, u3 ); \
|
||||
s4 = _mm_add_epi32( s4, u4 ); \
|
||||
s5 = _mm_add_epi32( s5, u5 ); \
|
||||
s6 = _mm_add_epi32( s6, u6 ); \
|
||||
s7 = _mm_add_epi32( s7, u7 ); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* COREn(in) performs the core HAVAL computation for "n" passes, using
|
||||
* the one-argument macro "in" to access the input words. Running state
|
||||
* is held in variable "s0" to "s7".
|
||||
*/
|
||||
/*
|
||||
#define CORE3(in) do { \
|
||||
SAVE_STATE; \
|
||||
PASS1(3, in); \
|
||||
PASS2(3, in); \
|
||||
PASS3(3, in); \
|
||||
UPDATE_STATE; \
|
||||
} while (0)
|
||||
|
||||
#define CORE4(in) do { \
|
||||
SAVE_STATE; \
|
||||
PASS1(4, in); \
|
||||
PASS2(4, in); \
|
||||
PASS3(4, in); \
|
||||
PASS4(4, in); \
|
||||
UPDATE_STATE; \
|
||||
} while (0)
|
||||
*/
|
||||
#define CORE5(in) do { \
|
||||
SAVE_STATE; \
|
||||
PASS1(5, in); \
|
||||
PASS2(5, in); \
|
||||
PASS3(5, in); \
|
||||
PASS4(5, in); \
|
||||
PASS5(5, in); \
|
||||
UPDATE_STATE; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* DSTATE declares the state variables "s0" to "s7".
|
||||
*/
|
||||
#define DSTATE __m128i s0, s1, s2, s3, s4, s5, s6, s7
|
||||
|
||||
/*
|
||||
* RSTATE fills the state variables from the context "sc".
|
||||
*/
|
||||
#define RSTATE \
|
||||
do { \
|
||||
s0 = sc->s0; \
|
||||
s1 = sc->s1; \
|
||||
s2 = sc->s2; \
|
||||
s3 = sc->s3; \
|
||||
s4 = sc->s4; \
|
||||
s5 = sc->s5; \
|
||||
s6 = sc->s6; \
|
||||
s7 = sc->s7; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* WSTATE updates the context "sc" from the state variables.
|
||||
*/
|
||||
#define WSTATE \
|
||||
do { \
|
||||
sc->s0 = s0; \
|
||||
sc->s1 = s1; \
|
||||
sc->s2 = s2; \
|
||||
sc->s3 = s3; \
|
||||
sc->s4 = s4; \
|
||||
sc->s5 = s5; \
|
||||
sc->s6 = s6; \
|
||||
sc->s7 = s7; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Initialize a context. "olen" is the output length, in 32-bit words
|
||||
* (between 4 and 8, inclusive). "passes" is the number of passes
|
||||
* (3, 4 or 5).
|
||||
*/
|
||||
static void
|
||||
haval_4way_init( haval_4way_context *sc, unsigned olen, unsigned passes )
|
||||
{
|
||||
sc->s0 = _mm_set1_epi32( 0x243F6A88UL );
|
||||
sc->s1 = _mm_set1_epi32( 0x85A308D3UL );
|
||||
sc->s2 = _mm_set1_epi32( 0x13198A2EUL );
|
||||
sc->s3 = _mm_set1_epi32( 0x03707344UL );
|
||||
sc->s4 = _mm_set1_epi32( 0xA4093822UL );
|
||||
sc->s5 = _mm_set1_epi32( 0x299F31D0UL );
|
||||
sc->s6 = _mm_set1_epi32( 0x082EFA98UL );
|
||||
sc->s7 = _mm_set1_epi32( 0xEC4E6C89UL );
|
||||
sc->olen = olen;
|
||||
sc->passes = passes;
|
||||
sc->count_high = 0;
|
||||
sc->count_low = 0;
|
||||
|
||||
}
|
||||
|
||||
#define IN_PREPARE(indata) const __m128i *const load_ptr = (indata)
|
||||
|
||||
#define INW(i) load_ptr[ i ]
|
||||
|
||||
/*
|
||||
* Write out HAVAL output. The output length is tailored to the requested
|
||||
* length.
|
||||
*/
|
||||
static void
|
||||
haval_4way_out( haval_4way_context *sc, void *dst )
|
||||
{
|
||||
__m128i *buf = (__m128i*)dst;
|
||||
DSTATE;
|
||||
RSTATE;
|
||||
|
||||
buf[0] = s0;
|
||||
buf[1] = s1;
|
||||
buf[2] = s2;
|
||||
buf[3] = s3;
|
||||
buf[4] = s4;
|
||||
buf[5] = s5;
|
||||
buf[6] = s6;
|
||||
buf[7] = s7;
|
||||
}
|
||||
|
||||
/*
|
||||
* The main core functions inline the code with the COREx() macros. We
|
||||
* use a helper file, included three times, which avoids code copying.
|
||||
*/
|
||||
/*
|
||||
#undef PASSES
|
||||
#define PASSES 3
|
||||
#include "haval-helper.c"
|
||||
|
||||
#undef PASSES
|
||||
#define PASSES 4
|
||||
#include "haval-helper.c"
|
||||
*/
|
||||
|
||||
#undef PASSES
|
||||
#define PASSES 5
|
||||
#include "haval-4way-helper.c"
|
||||
|
||||
/* ====================================================================== */
|
||||
|
||||
#define API(xxx, y) \
|
||||
void \
|
||||
haval ## xxx ## _ ## y ## _4way_init(void *cc) \
|
||||
{ \
|
||||
haval_4way_init(cc, xxx >> 5, y); \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
haval ## xxx ## _ ## y ## _4way (void *cc, const void *data, size_t len) \
|
||||
{ \
|
||||
haval ## y ## _4way(cc, data, len); \
|
||||
} \
|
||||
\
|
||||
void \
|
||||
haval ## xxx ## _ ## y ## _4way_close(void *cc, void *dst) \
|
||||
{ \
|
||||
haval ## y ## _4way_close(cc, dst); \
|
||||
} \
|
||||
|
||||
API(256, 5)
|
||||
|
||||
#define RVAL \
|
||||
do { \
|
||||
s0 = val[0]; \
|
||||
s1 = val[1]; \
|
||||
s2 = val[2]; \
|
||||
s3 = val[3]; \
|
||||
s4 = val[4]; \
|
||||
s5 = val[5]; \
|
||||
s6 = val[6]; \
|
||||
s7 = val[7]; \
|
||||
} while (0)
|
||||
|
||||
#define WVAL \
|
||||
do { \
|
||||
val[0] = s0; \
|
||||
val[1] = s1; \
|
||||
val[2] = s2; \
|
||||
val[3] = s3; \
|
||||
val[4] = s4; \
|
||||
val[5] = s5; \
|
||||
val[6] = s6; \
|
||||
val[7] = s7; \
|
||||
} while (0)
|
||||
|
||||
#define INMSG(i) msg[i]
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
95
algo/haval/haval-hash-4way.h
Normal file
95
algo/haval/haval-hash-4way.h
Normal file
@@ -0,0 +1,95 @@
|
||||
/* $Id: sph_haval.h 218 2010-06-08 17:06:34Z tp $ */
|
||||
/**
|
||||
* HAVAL interface.
|
||||
*
|
||||
* HAVAL is actually a family of 15 hash functions, depending on whether
|
||||
* the internal computation uses 3, 4 or 5 passes, and on the output
|
||||
* length, which is 128, 160, 192, 224 or 256 bits. This implementation
|
||||
* provides interface functions for all 15, which internally map to
|
||||
* three cores (depending on the number of passes). Note that output
|
||||
* lengths other than 256 bits are not obtained by a simple truncation
|
||||
* of a longer result; the requested length is encoded within the
|
||||
* padding data.
|
||||
*
|
||||
* HAVAL was published in: Yuliang Zheng, Josef Pieprzyk and Jennifer
|
||||
* Seberry: "HAVAL -- a one-way hashing algorithm with variable length
|
||||
* of output", Advances in Cryptology -- AUSCRYPT'92, Lecture Notes in
|
||||
* Computer Science, Vol.718, pp.83-104, Springer-Verlag, 1993.
|
||||
*
|
||||
* This paper, and a reference implementation, are available on the
|
||||
* Calyptix web site: http://labs.calyptix.com/haval.php
|
||||
*
|
||||
* The HAVAL reference paper is quite unclear on the data encoding
|
||||
* details, i.e. endianness (both byte order within a 32-bit word, and
|
||||
* word order within a message block). This implementation has been
|
||||
* made compatible with the reference implementation referenced above.
|
||||
*
|
||||
* @warning A collision for HAVAL-128/3 (HAVAL with three passes and
|
||||
* 128-bit output) has been published; this function is thus considered
|
||||
* as cryptographically broken. The status for other variants is unclear;
|
||||
* use only with care.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_haval.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef HAVAL_HASH_4WAY_H__
|
||||
#define HAVAL_HASH_4WAY_H__
|
||||
|
||||
#if defined(__AVX__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#define SPH_SIZE_haval256_5 256
|
||||
|
||||
typedef struct {
|
||||
__m128i buf[32];
|
||||
__m128i s0, s1, s2, s3, s4, s5, s6, s7;
|
||||
unsigned olen, passes;
|
||||
sph_u32 count_high, count_low;
|
||||
} haval_4way_context;
|
||||
|
||||
typedef haval_4way_context haval256_5_4way_context;
|
||||
|
||||
void haval256_5_4way_init( void *cc );
|
||||
|
||||
void haval256_5_4way( void *cc, const void *data, size_t len );
|
||||
|
||||
void haval256_5_4way_close( void *cc, void *dst );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
@@ -1,4 +1,3 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <stdio.h>
|
||||
@@ -16,7 +15,7 @@
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
#include "algo/hamsi/sph_hamsi.h"
|
||||
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
||||
#include "algo/luffa/luffa_for_sse2.h"
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
|
@@ -2,7 +2,6 @@
|
||||
#include <openssl/sha.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "sph_hefty1.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
|
@@ -1,10 +1,7 @@
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "miner.h"
|
||||
//#include "algo-gate-api.h"
|
||||
#include "hodl-gate.h"
|
||||
//#include "hodl.h"
|
||||
#include "hodl-wolf.h"
|
||||
|
||||
#define HODL_NSTARTLOC_INDEX 20
|
||||
@@ -97,21 +94,20 @@ bool hodl_do_this_thread( int thr_id )
|
||||
int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
|
||||
return false;
|
||||
// GetPsuedoRandomData( hodl_scratchbuf, work->data, thr_id );
|
||||
// pthread_barrier_wait( &hodl_barrier );
|
||||
// return scanhash_hodl( thr_id, work, max_nonce, hashes_done );
|
||||
#else
|
||||
GenRandomGarbage( hodl_scratchbuf, work->data, thr_id );
|
||||
#ifndef NO_AES_NI
|
||||
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, thr_id );
|
||||
pthread_barrier_wait( &hodl_barrier );
|
||||
return scanhash_hodl_wolf( thr_id, work, max_nonce, hashes_done );
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool register_hodl_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
|
||||
return false;
|
||||
#endif
|
||||
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&hodl_scanhash;
|
||||
|
@@ -150,6 +150,9 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
int searchNumber = COMPARE_SIZE / opt_n_threads;
|
||||
int startLoc = threadNumber * searchNumber;
|
||||
|
||||
if ( opt_debug )
|
||||
applog( LOG_DEBUG,"Hash target= %08lx", ptarget[7] );
|
||||
|
||||
for(int32_t k = startLoc; k < startLoc + searchNumber && !work_restart[threadNumber].restart; k++)
|
||||
{
|
||||
// copy data to first l2 cache
|
||||
|
@@ -4,6 +4,11 @@
|
||||
//Dependencies
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#include <sys/endian.h>
|
||||
#endif
|
||||
|
||||
#include "tmmintrin.h"
|
||||
#include "smmintrin.h"
|
||||
|
||||
|
@@ -3,6 +3,11 @@
|
||||
//Dependencies
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#include <sys/endian.h>
|
||||
#endif
|
||||
|
||||
#include "tmmintrin.h"
|
||||
#include "smmintrin.h"
|
||||
#include "immintrin.h"
|
||||
|
609
algo/jh/jh-hash-4way.c
Normal file
609
algo/jh/jh-hash-4way.c
Normal file
@@ -0,0 +1,609 @@
|
||||
/* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */
|
||||
/*
|
||||
* JH implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "jh-hash-4way.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_JH
|
||||
#define SPH_SMALL_FOOTPRINT_JH 1
|
||||
#endif
|
||||
|
||||
#if !defined SPH_JH_64 && SPH_64_TRUE
|
||||
#define SPH_JH_64 1
|
||||
#endif
|
||||
|
||||
#if !SPH_64
|
||||
#undef SPH_JH_64
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The internal bitslice representation may use either big-endian or
|
||||
* little-endian (true bitslice operations do not care about the bit
|
||||
* ordering, and the bit-swapping linear operations in JH happen to
|
||||
* be invariant through endianness-swapping). The constants must be
|
||||
* defined according to the chosen endianness; we use some
|
||||
* byte-swapping macros for that.
|
||||
*/
|
||||
|
||||
#if SPH_LITTLE_ENDIAN
|
||||
|
||||
#if SPH_64
|
||||
#define C64e(x) ((SPH_C64(x) >> 56) \
|
||||
| ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
|
||||
| ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
|
||||
| ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
|
||||
| ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
|
||||
| ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
|
||||
| ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
|
||||
| ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
|
||||
#define dec64e_aligned sph_dec64le_aligned
|
||||
#define enc64e sph_enc64le
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if SPH_64
|
||||
#define C64e(x) SPH_C64(x)
|
||||
#define dec64e_aligned sph_dec64be_aligned
|
||||
#define enc64e sph_enc64be
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define Sb(x0, x1, x2, x3, c) \
|
||||
do { \
|
||||
__m256i cc = _mm256_set_epi64x( c, c, c, c ); \
|
||||
x3 = mm256_not( x3 ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_andnot_si256( x2, cc ) ); \
|
||||
tmp = _mm256_xor_si256( cc, _mm256_and_si256( x0, x1 ) ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_and_si256( x2, x3 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, _mm256_andnot_si256( x1, x2 ) ); \
|
||||
x1 = _mm256_xor_si256( x1, _mm256_and_si256( x0, x2 ) ); \
|
||||
x2 = _mm256_xor_si256( x2, _mm256_andnot_si256( x3, x0 ) ); \
|
||||
x0 = _mm256_xor_si256( x0, _mm256_or_si256( x1, x3 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, _mm256_and_si256( x1, x2 ) ); \
|
||||
x1 = _mm256_xor_si256( x1, _mm256_and_si256( tmp, x0 ) ); \
|
||||
x2 = _mm256_xor_si256( x2, tmp ); \
|
||||
} while (0)
|
||||
|
||||
#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) \
|
||||
do { \
|
||||
x4 = _mm256_xor_si256( x4, x1 ); \
|
||||
x5 = _mm256_xor_si256( x5, x2 ); \
|
||||
x6 = _mm256_xor_si256( x6, _mm256_xor_si256( x3, x0 ) ); \
|
||||
x7 = _mm256_xor_si256( x7, x0 ); \
|
||||
x0 = _mm256_xor_si256( x0, x5 ); \
|
||||
x1 = _mm256_xor_si256( x1, x6 ); \
|
||||
x2 = _mm256_xor_si256( x2, _mm256_xor_si256( x7, x4 ) ); \
|
||||
x3 = _mm256_xor_si256( x3, x4 ); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_JH_64
|
||||
|
||||
static const sph_u64 C[] = {
|
||||
C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
|
||||
C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
|
||||
C64e(0xea983ae05c45fa9c), C64e(0x03c5d29966b2999a),
|
||||
C64e(0x660296b4f2bb538a), C64e(0xb556141a88dba231),
|
||||
C64e(0x03a35a5c9a190edb), C64e(0x403fb20a87c14410),
|
||||
C64e(0x1c051980849e951d), C64e(0x6f33ebad5ee7cddc),
|
||||
C64e(0x10ba139202bf6b41), C64e(0xdc786515f7bb27d0),
|
||||
C64e(0x0a2c813937aa7850), C64e(0x3f1abfd2410091d3),
|
||||
C64e(0x422d5a0df6cc7e90), C64e(0xdd629f9c92c097ce),
|
||||
C64e(0x185ca70bc72b44ac), C64e(0xd1df65d663c6fc23),
|
||||
C64e(0x976e6c039ee0b81a), C64e(0x2105457e446ceca8),
|
||||
C64e(0xeef103bb5d8e61fa), C64e(0xfd9697b294838197),
|
||||
C64e(0x4a8e8537db03302f), C64e(0x2a678d2dfb9f6a95),
|
||||
C64e(0x8afe7381f8b8696c), C64e(0x8ac77246c07f4214),
|
||||
C64e(0xc5f4158fbdc75ec4), C64e(0x75446fa78f11bb80),
|
||||
C64e(0x52de75b7aee488bc), C64e(0x82b8001e98a6a3f4),
|
||||
C64e(0x8ef48f33a9a36315), C64e(0xaa5f5624d5b7f989),
|
||||
C64e(0xb6f1ed207c5ae0fd), C64e(0x36cae95a06422c36),
|
||||
C64e(0xce2935434efe983d), C64e(0x533af974739a4ba7),
|
||||
C64e(0xd0f51f596f4e8186), C64e(0x0e9dad81afd85a9f),
|
||||
C64e(0xa7050667ee34626a), C64e(0x8b0b28be6eb91727),
|
||||
C64e(0x47740726c680103f), C64e(0xe0a07e6fc67e487b),
|
||||
C64e(0x0d550aa54af8a4c0), C64e(0x91e3e79f978ef19e),
|
||||
C64e(0x8676728150608dd4), C64e(0x7e9e5a41f3e5b062),
|
||||
C64e(0xfc9f1fec4054207a), C64e(0xe3e41a00cef4c984),
|
||||
C64e(0x4fd794f59dfa95d8), C64e(0x552e7e1124c354a5),
|
||||
C64e(0x5bdf7228bdfe6e28), C64e(0x78f57fe20fa5c4b2),
|
||||
C64e(0x05897cefee49d32e), C64e(0x447e9385eb28597f),
|
||||
C64e(0x705f6937b324314a), C64e(0x5e8628f11dd6e465),
|
||||
C64e(0xc71b770451b920e7), C64e(0x74fe43e823d4878a),
|
||||
C64e(0x7d29e8a3927694f2), C64e(0xddcb7a099b30d9c1),
|
||||
C64e(0x1d1b30fb5bdc1be0), C64e(0xda24494ff29c82bf),
|
||||
C64e(0xa4e7ba31b470bfff), C64e(0x0d324405def8bc48),
|
||||
C64e(0x3baefc3253bbd339), C64e(0x459fc3c1e0298ba0),
|
||||
C64e(0xe5c905fdf7ae090f), C64e(0x947034124290f134),
|
||||
C64e(0xa271b701e344ed95), C64e(0xe93b8e364f2f984a),
|
||||
C64e(0x88401d63a06cf615), C64e(0x47c1444b8752afff),
|
||||
C64e(0x7ebb4af1e20ac630), C64e(0x4670b6c5cc6e8ce6),
|
||||
C64e(0xa4d5a456bd4fca00), C64e(0xda9d844bc83e18ae),
|
||||
C64e(0x7357ce453064d1ad), C64e(0xe8a6ce68145c2567),
|
||||
C64e(0xa3da8cf2cb0ee116), C64e(0x33e906589a94999a),
|
||||
C64e(0x1f60b220c26f847b), C64e(0xd1ceac7fa0d18518),
|
||||
C64e(0x32595ba18ddd19d3), C64e(0x509a1cc0aaa5b446),
|
||||
C64e(0x9f3d6367e4046bba), C64e(0xf6ca19ab0b56ee7e),
|
||||
C64e(0x1fb179eaa9282174), C64e(0xe9bdf7353b3651ee),
|
||||
C64e(0x1d57ac5a7550d376), C64e(0x3a46c2fea37d7001),
|
||||
C64e(0xf735c1af98a4d842), C64e(0x78edec209e6b6779),
|
||||
C64e(0x41836315ea3adba8), C64e(0xfac33b4d32832c83),
|
||||
C64e(0xa7403b1f1c2747f3), C64e(0x5940f034b72d769a),
|
||||
C64e(0xe73e4e6cd2214ffd), C64e(0xb8fd8d39dc5759ef),
|
||||
C64e(0x8d9b0c492b49ebda), C64e(0x5ba2d74968f3700d),
|
||||
C64e(0x7d3baed07a8d5584), C64e(0xf5a5e9f0e4f88e65),
|
||||
C64e(0xa0b8a2f436103b53), C64e(0x0ca8079e753eec5a),
|
||||
C64e(0x9168949256e8884f), C64e(0x5bb05c55f8babc4c),
|
||||
C64e(0xe3bb3b99f387947b), C64e(0x75daf4d6726b1c5d),
|
||||
C64e(0x64aeac28dc34b36d), C64e(0x6c34a550b828db71),
|
||||
C64e(0xf861e2f2108d512a), C64e(0xe3db643359dd75fc),
|
||||
C64e(0x1cacbcf143ce3fa2), C64e(0x67bbd13c02e843b0),
|
||||
C64e(0x330a5bca8829a175), C64e(0x7f34194db416535c),
|
||||
C64e(0x923b94c30e794d1e), C64e(0x797475d7b6eeaf3f),
|
||||
C64e(0xeaa8d4f7be1a3921), C64e(0x5cf47e094c232751),
|
||||
C64e(0x26a32453ba323cd2), C64e(0x44a3174a6da6d5ad),
|
||||
C64e(0xb51d3ea6aff2c908), C64e(0x83593d98916b3c56),
|
||||
C64e(0x4cf87ca17286604d), C64e(0x46e23ecc086ec7f6),
|
||||
C64e(0x2f9833b3b1bc765e), C64e(0x2bd666a5efc4e62a),
|
||||
C64e(0x06f4b6e8bec1d436), C64e(0x74ee8215bcef2163),
|
||||
C64e(0xfdc14e0df453c969), C64e(0xa77d5ac406585826),
|
||||
C64e(0x7ec1141606e0fa16), C64e(0x7e90af3d28639d3f),
|
||||
C64e(0xd2c9f2e3009bd20c), C64e(0x5faace30b7d40c30),
|
||||
C64e(0x742a5116f2e03298), C64e(0x0deb30d8e3cef89a),
|
||||
C64e(0x4bc59e7bb5f17992), C64e(0xff51e66e048668d3),
|
||||
C64e(0x9b234d57e6966731), C64e(0xcce6a6f3170a7505),
|
||||
C64e(0xb17681d913326cce), C64e(0x3c175284f805a262),
|
||||
C64e(0xf42bcbb378471547), C64e(0xff46548223936a48),
|
||||
C64e(0x38df58074e5e6565), C64e(0xf2fc7c89fc86508e),
|
||||
C64e(0x31702e44d00bca86), C64e(0xf04009a23078474e),
|
||||
C64e(0x65a0ee39d1f73883), C64e(0xf75ee937e42c3abd),
|
||||
C64e(0x2197b2260113f86f), C64e(0xa344edd1ef9fdee7),
|
||||
C64e(0x8ba0df15762592d9), C64e(0x3c85f7f612dc42be),
|
||||
C64e(0xd8a7ec7cab27b07e), C64e(0x538d7ddaaa3ea8de),
|
||||
C64e(0xaa25ce93bd0269d8), C64e(0x5af643fd1a7308f9),
|
||||
C64e(0xc05fefda174a19a5), C64e(0x974d66334cfd216a),
|
||||
C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
|
||||
C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
|
||||
};
|
||||
|
||||
#define Ceven_hi(r) (C[((r) << 2) + 0])
|
||||
#define Ceven_lo(r) (C[((r) << 2) + 1])
|
||||
#define Codd_hi(r) (C[((r) << 2) + 2])
|
||||
#define Codd_lo(r) (C[((r) << 2) + 3])
|
||||
|
||||
#define S(x0, x1, x2, x3, cb, r) do { \
|
||||
Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \
|
||||
Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \
|
||||
} while (0)
|
||||
|
||||
#define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
|
||||
Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \
|
||||
x4 ## h, x5 ## h, x6 ## h, x7 ## h); \
|
||||
Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \
|
||||
x4 ## l, x5 ## l, x6 ## l, x7 ## l); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define Wz(x, c, n) \
|
||||
do { \
|
||||
__m256i t = _mm256_slli_epi64( _mm256_and_si256(x ## h, (c)), (n) ); \
|
||||
x ## h = _mm256_or_si256( _mm256_and_si256( \
|
||||
_mm256_srli_epi64(x ## h, (n)), (c)), t ); \
|
||||
t = _mm256_slli_epi64( _mm256_and_si256(x ## l, (c)), (n) ); \
|
||||
x ## l = _mm256_or_si256( _mm256_and_si256((x ## l >> (n)), (c)), t ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/*
|
||||
#define Wz(x, c, n) do { \
|
||||
sph_u64 t = (x ## h & (c)) << (n); \
|
||||
x ## h = ((x ## h >> (n)) & (c)) | t; \
|
||||
t = (x ## l & (c)) << (n); \
|
||||
x ## l = ((x ## l >> (n)) & (c)) | t; \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
#define W0(x) Wz(x, _mm256_set_epi64x( 0x5555555555555555, \
|
||||
0x5555555555555555, 0x5555555555555555, 0x5555555555555555 ), 1 )
|
||||
#define W1(x) Wz(x, _mm256_set_epi64x( 0x3333333333333333, \
|
||||
0x3333333333333333, 0x3333333333333333, 0x3333333333333333 ), 2 )
|
||||
#define W2(x) Wz(x, _mm256_set_epi64x( 0x0F0F0F0F0F0F0F0F, \
|
||||
0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F ), 4 )
|
||||
#define W3(x) Wz(x, _mm256_set_epi64x( 0x00FF00FF00FF00FF, \
|
||||
0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF ), 8 )
|
||||
#define W4(x) Wz(x, _mm256_set_epi64x( 0x0000FFFF0000FFFF, \
|
||||
0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF ), 16 )
|
||||
#define W5(x) Wz(x, _mm256_set_epi64x( 0x00000000FFFFFFFF, \
|
||||
0x00000000FFFFFFFF, 0x00000000FFFFFFFF, 0x00000000FFFFFFFF ), 32 )
|
||||
#define W6(x) \
|
||||
do { \
|
||||
__m256i t = x ## h; \
|
||||
x ## h = x ## l; \
|
||||
x ## l = t; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
#define W0(x) Wz(x, SPH_C64(0x5555555555555555), 1)
|
||||
#define W1(x) Wz(x, SPH_C64(0x3333333333333333), 2)
|
||||
#define W2(x) Wz(x, SPH_C64(0x0F0F0F0F0F0F0F0F), 4)
|
||||
#define W3(x) Wz(x, SPH_C64(0x00FF00FF00FF00FF), 8)
|
||||
#define W4(x) Wz(x, SPH_C64(0x0000FFFF0000FFFF), 16)
|
||||
#define W5(x) Wz(x, SPH_C64(0x00000000FFFFFFFF), 32)
|
||||
#define W6(x) do { \
|
||||
sph_u64 t = x ## h; \
|
||||
x ## h = x ## l; \
|
||||
x ## l = t; \
|
||||
} while (0)
|
||||
*/
|
||||
|
||||
#define DECL_STATE \
|
||||
__m256i h0h, h1h, h2h, h3h, h4h, h5h, h6h, h7h; \
|
||||
__m256i h0l, h1l, h2l, h3l, h4l, h5l, h6l, h7l; \
|
||||
__m256i tmp;
|
||||
|
||||
#define READ_STATE(state) do { \
|
||||
h0h = (state)->H[ 0]; \
|
||||
h0l = (state)->H[ 1]; \
|
||||
h1h = (state)->H[ 2]; \
|
||||
h1l = (state)->H[ 3]; \
|
||||
h2h = (state)->H[ 4]; \
|
||||
h2l = (state)->H[ 5]; \
|
||||
h3h = (state)->H[ 6]; \
|
||||
h3l = (state)->H[ 7]; \
|
||||
h4h = (state)->H[ 8]; \
|
||||
h4l = (state)->H[ 9]; \
|
||||
h5h = (state)->H[10]; \
|
||||
h5l = (state)->H[11]; \
|
||||
h6h = (state)->H[12]; \
|
||||
h6l = (state)->H[13]; \
|
||||
h7h = (state)->H[14]; \
|
||||
h7l = (state)->H[15]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE(state) do { \
|
||||
(state)->H[ 0] = h0h; \
|
||||
(state)->H[ 1] = h0l; \
|
||||
(state)->H[ 2] = h1h; \
|
||||
(state)->H[ 3] = h1l; \
|
||||
(state)->H[ 4] = h2h; \
|
||||
(state)->H[ 5] = h2l; \
|
||||
(state)->H[ 6] = h3h; \
|
||||
(state)->H[ 7] = h3l; \
|
||||
(state)->H[ 8] = h4h; \
|
||||
(state)->H[ 9] = h4l; \
|
||||
(state)->H[10] = h5h; \
|
||||
(state)->H[11] = h5l; \
|
||||
(state)->H[12] = h6h; \
|
||||
(state)->H[13] = h6l; \
|
||||
(state)->H[14] = h7h; \
|
||||
(state)->H[15] = h7l; \
|
||||
} while (0)
|
||||
|
||||
#define INPUT_BUF1 \
|
||||
__m256i m0h = buf[0]; \
|
||||
__m256i m0l = buf[1]; \
|
||||
__m256i m1h = buf[2]; \
|
||||
__m256i m1l = buf[3]; \
|
||||
__m256i m2h = buf[4]; \
|
||||
__m256i m2l = buf[5]; \
|
||||
__m256i m3h = buf[6]; \
|
||||
__m256i m3l = buf[7]; \
|
||||
h0h = _mm256_xor_si256( h0h, m0h ); \
|
||||
h0l = _mm256_xor_si256( h0l, m0l ); \
|
||||
h1h = _mm256_xor_si256( h1h, m1h ); \
|
||||
h1l = _mm256_xor_si256( h1l, m1l ); \
|
||||
h2h = _mm256_xor_si256( h2h, m2h ); \
|
||||
h2l = _mm256_xor_si256( h2l, m2l ); \
|
||||
h3h = _mm256_xor_si256( h3h, m3h ); \
|
||||
h3l = _mm256_xor_si256( h3l, m3l ); \
|
||||
|
||||
#define INPUT_BUF2 \
|
||||
h4h = _mm256_xor_si256( h4h, m0h ); \
|
||||
h4l = _mm256_xor_si256( h4l, m0l ); \
|
||||
h5h = _mm256_xor_si256( h5h, m1h ); \
|
||||
h5l = _mm256_xor_si256( h5l, m1l ); \
|
||||
h6h = _mm256_xor_si256( h6h, m2h ); \
|
||||
h6l = _mm256_xor_si256( h6l, m2l ); \
|
||||
h7h = _mm256_xor_si256( h7h, m3h ); \
|
||||
h7l = _mm256_xor_si256( h7l, m3l ); \
|
||||
|
||||
static const sph_u64 IV256[] = {
|
||||
C64e(0xeb98a3412c20d3eb), C64e(0x92cdbe7b9cb245c1),
|
||||
C64e(0x1c93519160d4c7fa), C64e(0x260082d67e508a03),
|
||||
C64e(0xa4239e267726b945), C64e(0xe0fb1a48d41a9477),
|
||||
C64e(0xcdb5ab26026b177a), C64e(0x56f024420fff2fa8),
|
||||
C64e(0x71a396897f2e4d75), C64e(0x1d144908f77de262),
|
||||
C64e(0x277695f776248f94), C64e(0x87d5b6574780296c),
|
||||
C64e(0x5c5e272dac8e0d6c), C64e(0x518450c657057a0f),
|
||||
C64e(0x7be4d367702412ea), C64e(0x89e3ab13d31cd769)
|
||||
};
|
||||
|
||||
|
||||
static const sph_u64 IV512[] = {
|
||||
C64e(0x6fd14b963e00aa17), C64e(0x636a2e057a15d543),
|
||||
C64e(0x8a225e8d0c97ef0b), C64e(0xe9341259f2b3c361),
|
||||
C64e(0x891da0c1536f801e), C64e(0x2aa9056bea2b6d80),
|
||||
C64e(0x588eccdb2075baa6), C64e(0xa90f3a76baf83bf7),
|
||||
C64e(0x0169e60541e34a69), C64e(0x46b58a8e2e6fe65a),
|
||||
C64e(0x1047a7d0c1843c24), C64e(0x3b6e71b12d5ac199),
|
||||
C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
|
||||
C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#define SL(ro) SLu(r + ro, ro)
|
||||
|
||||
#define SLu(r, ro) do { \
|
||||
S(h0, h2, h4, h6, Ceven_, r); \
|
||||
S(h1, h3, h5, h7, Codd_, r); \
|
||||
L(h0, h2, h4, h6, h1, h3, h5, h7); \
|
||||
W ## ro(h1); \
|
||||
W ## ro(h3); \
|
||||
W ## ro(h5); \
|
||||
W ## ro(h7); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_JH
|
||||
|
||||
#if SPH_JH_64
|
||||
|
||||
/*
|
||||
* The "small footprint" 64-bit version just uses a partially unrolled
|
||||
* loop.
|
||||
*/
|
||||
|
||||
#define E8 do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 42; r += 7) { \
|
||||
SL(0); \
|
||||
SL(1); \
|
||||
SL(2); \
|
||||
SL(3); \
|
||||
SL(4); \
|
||||
SL(5); \
|
||||
SL(6); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if SPH_JH_64
|
||||
|
||||
/*
|
||||
* On a "true 64-bit" architecture, we can unroll at will.
|
||||
*/
|
||||
|
||||
#define E8 do { \
|
||||
SLu( 0, 0); \
|
||||
SLu( 1, 1); \
|
||||
SLu( 2, 2); \
|
||||
SLu( 3, 3); \
|
||||
SLu( 4, 4); \
|
||||
SLu( 5, 5); \
|
||||
SLu( 6, 6); \
|
||||
SLu( 7, 0); \
|
||||
SLu( 8, 1); \
|
||||
SLu( 9, 2); \
|
||||
SLu(10, 3); \
|
||||
SLu(11, 4); \
|
||||
SLu(12, 5); \
|
||||
SLu(13, 6); \
|
||||
SLu(14, 0); \
|
||||
SLu(15, 1); \
|
||||
SLu(16, 2); \
|
||||
SLu(17, 3); \
|
||||
SLu(18, 4); \
|
||||
SLu(19, 5); \
|
||||
SLu(20, 6); \
|
||||
SLu(21, 0); \
|
||||
SLu(22, 1); \
|
||||
SLu(23, 2); \
|
||||
SLu(24, 3); \
|
||||
SLu(25, 4); \
|
||||
SLu(26, 5); \
|
||||
SLu(27, 6); \
|
||||
SLu(28, 0); \
|
||||
SLu(29, 1); \
|
||||
SLu(30, 2); \
|
||||
SLu(31, 3); \
|
||||
SLu(32, 4); \
|
||||
SLu(33, 5); \
|
||||
SLu(34, 6); \
|
||||
SLu(35, 0); \
|
||||
SLu(36, 1); \
|
||||
SLu(37, 2); \
|
||||
SLu(38, 3); \
|
||||
SLu(39, 4); \
|
||||
SLu(40, 5); \
|
||||
SLu(41, 6); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
jh_4way_init( jh_4way_context *sc, const void *iv )
|
||||
{
|
||||
uint64_t *v = (uint64_t*)iv;
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
sc->H[i] = _mm256_set_epi64x( v[i], v[i], v[i], v[i] );
|
||||
sc->ptr = 0;
|
||||
sc->block_count = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
jh_4way_core( jh_4way_context *sc, const void *data, size_t len )
|
||||
{
|
||||
__m256i *buf;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
const int buf_size = 64; // 64 * _m256i
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
|
||||
if ( len < (buf_size - ptr) )
|
||||
{
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE(sc);
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
clen = buf_size - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata += (clen>>3);
|
||||
len -= clen;
|
||||
if ( ptr == buf_size )
|
||||
{
|
||||
INPUT_BUF1;
|
||||
E8;
|
||||
INPUT_BUF2;
|
||||
sc->block_count ++;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
|
||||
size_t out_size_w32, const void *iv )
|
||||
{
|
||||
__m256i buf[16*4];
|
||||
__m256i *dst256 = (__m256i*)dst;
|
||||
size_t numz, u;
|
||||
sph_u64 l0, l1, l0e, l1e;
|
||||
|
||||
buf[0] = _mm256_set_epi64x( 0x80, 0x80, 0x80, 0x80 );
|
||||
|
||||
if ( sc->ptr == 0 )
|
||||
numz = 48;
|
||||
else
|
||||
numz = 112 - sc->ptr;
|
||||
|
||||
memset_zero_256( buf+1, (numz>>3) - 1 );
|
||||
|
||||
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3);
|
||||
l1 = SPH_T64(sc->block_count >> 55);
|
||||
sph_enc64be( &l0e, l0 );
|
||||
sph_enc64be( &l1e, l1 );
|
||||
*(buf + (numz>>3) ) = _mm256_set_epi64x( l1e, l1e, l1e, l1e );
|
||||
*(buf + (numz>>3) + 1) = _mm256_set_epi64x( l0e, l0e, l0e, l0e );
|
||||
|
||||
jh_4way_core( sc, buf, numz + 16 );
|
||||
|
||||
for ( u=0; u < 8; u++ )
|
||||
buf[u] = sc->H[u+8];
|
||||
|
||||
memcpy_256( dst256, buf, 8 );
|
||||
}
|
||||
|
||||
void
|
||||
jh256_4way_init(void *cc)
|
||||
{
|
||||
jh_4way_init(cc, IV256);
|
||||
}
|
||||
|
||||
void
|
||||
jh256_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
jh_4way_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
jh256_4way_close(void *cc, void *dst)
|
||||
{
|
||||
jh_4way_close(cc, 0, 0, dst, 8, IV256);
|
||||
}
|
||||
|
||||
void
|
||||
jh512_4way_init(void *cc)
|
||||
{
|
||||
jh_4way_init(cc, IV512);
|
||||
}
|
||||
|
||||
void
|
||||
jh512_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
jh_4way_core(cc, data, len);
|
||||
}
|
||||
|
||||
void
|
||||
jh512_4way_close(void *cc, void *dst)
|
||||
{
|
||||
jh_4way_close(cc, 0, 0, dst, 16, IV512);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
100
algo/jh/jh-hash-4way.h
Normal file
100
algo/jh/jh-hash-4way.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* JH interface. JH is a family of functions which differ by
|
||||
* their output size; this implementation defines JH for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_jh.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef JH_HASH_4WAY_H__
|
||||
#define JH_HASH_4WAY_H__
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#define SPH_SIZE_jh256 256
|
||||
|
||||
#define SPH_SIZE_jh512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for JH computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a JH computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running JH computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
__m256i buf[8] __attribute__ ((aligned (64)));
|
||||
__m256i H[16];
|
||||
size_t ptr;
|
||||
uint64_t block_count;
|
||||
/*
|
||||
unsigned char buf[64];
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u64 wide[16];
|
||||
} H;
|
||||
sph_u64 block_count;
|
||||
*/
|
||||
} jh_4way_context;
|
||||
|
||||
typedef jh_4way_context jh256_4way_context;
|
||||
|
||||
typedef jh_4way_context jh512_4way_context;
|
||||
|
||||
void jh256_4way_init(void *cc);
|
||||
|
||||
void jh256_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void jh256_4way_close(void *cc, void *dst);
|
||||
|
||||
void jh512_4way_init(void *cc);
|
||||
|
||||
void jh512_4way(void *cc, const void *data, size_t len);
|
||||
|
||||
void jh512_4way_close(void *cc, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
154
algo/jh/jha-4way.c
Normal file
154
algo/jh/jha-4way.c
Normal file
@@ -0,0 +1,154 @@
|
||||
#include "jha-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
//#include "avxdefs.h"
|
||||
|
||||
#if defined(JHA_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/jh/jh-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
|
||||
//static __thread keccak512_4way_context jha_kec_mid
|
||||
// __attribute__ ((aligned (64)));
|
||||
|
||||
void jha_hash_4way( void *out, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashA[8*4] __attribute__ ((aligned (64)));
|
||||
uint64_t vhashB[8*4] __attribute__ ((aligned (64)));
|
||||
__m256i* vh = (__m256i*)vhash;
|
||||
__m256i* vhA = (__m256i*)vhashA;
|
||||
__m256i* vhB = (__m256i*)vhashB;
|
||||
__m256i vh_mask;
|
||||
|
||||
blake512_4way_context ctx_blake;
|
||||
hashState_groestl ctx_groestl;
|
||||
jh512_4way_context ctx_jh;
|
||||
skein512_4way_context ctx_skein;
|
||||
keccak512_4way_context ctx_keccak;
|
||||
|
||||
keccak512_4way_init( &ctx_keccak );
|
||||
keccak512_4way( &ctx_keccak, input, 80 );
|
||||
keccak512_4way_close( &ctx_keccak, vhash );
|
||||
|
||||
// Heavy & Light Pair Loop
|
||||
for ( int round = 0; round < 3; round++ )
|
||||
{
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256(
|
||||
vh[0], _mm256_set1_epi64x( 1 ) ), m256_zero );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash1,
|
||||
(char*)hash1, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash2,
|
||||
(char*)hash2, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
mm256_interleave_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx_skein, vhashB );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
|
||||
blake512_4way_init( &ctx_blake );
|
||||
blake512_4way( &ctx_blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx_blake, vhashA );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx_jh, vhashB );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
}
|
||||
|
||||
mm256_deinterleave_4x64( out, out+32, out+64, out+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
{
|
||||
if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
jha_hash_4way( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
#endif
|
18
algo/jh/jha-gate.c
Normal file
18
algo/jh/jha-gate.c
Normal file
@@ -0,0 +1,18 @@
|
||||
#include "jha-gate.h"
|
||||
|
||||
|
||||
bool register_jha_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (JHA_4WAY)
|
||||
four_way_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_jha_4way;
|
||||
gate->hash = (void*)&jha_hash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_jha;
|
||||
gate->hash = (void*)&jha_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
return true;
|
||||
};
|
||||
|
25
algo/jh/jha-gate.h
Normal file
25
algo/jh/jha-gate.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef JHA_GATE_H__
|
||||
#define JHA_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define JHA_4WAY
|
||||
#endif
|
||||
|
||||
#if defined JHA_4WAY
|
||||
void jha_hash_4way( void *state, const void *input );
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
void jha_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_jha( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
155
algo/jh/jha.c
Normal file
155
algo/jh/jha.c
Normal file
@@ -0,0 +1,155 @@
|
||||
#include "jha-gate.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#else
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
static __thread sph_keccak512_context jha_kec_mid __attribute__ ((aligned (64)));
|
||||
|
||||
void jha_kec_midstate( const void* input )
|
||||
{
|
||||
sph_keccak512_init( &jha_kec_mid );
|
||||
sph_keccak512( &jha_kec_mid, input, 64 );
|
||||
}
|
||||
|
||||
void jha_hash(void *output, const void *input)
|
||||
{
|
||||
uint8_t _ALIGN(128) hash[64];
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context ctx_groestl;
|
||||
#else
|
||||
hashState_groestl ctx_groestl;
|
||||
#endif
|
||||
sph_blake512_context ctx_blake;
|
||||
sph_jh512_context ctx_jh;
|
||||
sph_keccak512_context ctx_keccak;
|
||||
sph_skein512_context ctx_skein;
|
||||
|
||||
memcpy( &ctx_keccak, &jha_kec_mid, sizeof jha_kec_mid );
|
||||
sph_keccak512(&ctx_keccak, input+64, 16 );
|
||||
sph_keccak512_close(&ctx_keccak, hash );
|
||||
|
||||
// Heavy & Light Pair Loop
|
||||
for (int round = 0; round < 3; round++)
|
||||
{
|
||||
if (hash[0] & 0x01)
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_init(&ctx_groestl);
|
||||
sph_groestl512(&ctx_groestl, hash, 64 );
|
||||
sph_groestl512_close(&ctx_groestl, hash );
|
||||
#else
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash,
|
||||
(char*)hash, 512 );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_skein512_init(&ctx_skein);
|
||||
sph_skein512(&ctx_skein, hash, 64);
|
||||
sph_skein512_close(&ctx_skein, hash );
|
||||
}
|
||||
|
||||
if (hash[0] & 0x01)
|
||||
{
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash );
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_jh512_init(&ctx_jh);
|
||||
sph_jh512(&ctx_jh, hash, 64 );
|
||||
sph_jh512_close(&ctx_jh, hash );
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
for (int i=0; i < 19; i++) {
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
|
||||
jha_kec_midstate( endiandata );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < 6; m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
jha_hash(hash32, endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash32[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash32, ptarget)) {
|
||||
work_set_target_ratio(work, hash32);
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -914,6 +914,7 @@ jh_core(sph_jh_context *sc, const void *data, size_t len)
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
|
@@ -22,15 +22,12 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "algo/sha/sha3-defs.h"
|
||||
|
||||
typedef __m128i word128; /*word128 defines a 128-bit SSE2 word*/
|
||||
|
||||
typedef unsigned char BitSequence;
|
||||
typedef unsigned long long DataLength;
|
||||
typedef enum {jhSUCCESS = 0, jhFAIL = 1, jhBAD_HASHLEN = 2} jhReturn;
|
||||
|
||||
/*define data alignment for different C compilers*/
|
||||
@@ -342,13 +339,13 @@ do { \
|
||||
jhSbuffer[53] = 0x00, \
|
||||
jhSbuffer[54] = 0x00, \
|
||||
jhSbuffer[55] = 0x00; \
|
||||
jhSbuffer[56] = ((64*8) >> 56) & 0xff, \
|
||||
jhSbuffer[57] = ((64*8) >> 48) & 0xff, \
|
||||
jhSbuffer[58] = ((64*8) >> 40) & 0xff, \
|
||||
jhSbuffer[59] = ((64*8) >> 32) & 0xff, \
|
||||
jhSbuffer[60] = ((64*8) >> 24) & 0xff, \
|
||||
jhSbuffer[61] = ((64*8) >> 16) & 0xff, \
|
||||
jhSbuffer[62] = ((64*8) >> 8) & 0xff, \
|
||||
jhSbuffer[56] = ((char)((uint64_t)(64*8) >> 56)) & 0xff, \
|
||||
jhSbuffer[57] = ((char)((uint64_t)(64*8) >> 48)) & 0xff, \
|
||||
jhSbuffer[58] = ((char)((uint64_t)(64*8) >> 40)) & 0xff, \
|
||||
jhSbuffer[59] = ((char)((uint64_t)(64*8) >> 32)) & 0xff, \
|
||||
jhSbuffer[60] = ((char)((uint64_t)(64*8) >> 24)) & 0xff, \
|
||||
jhSbuffer[61] = ((char)((uint64_t)(64*8) >> 16)) & 0xff, \
|
||||
jhSbuffer[62] = ((char)((uint64_t)(64*8) >> 8)) & 0xff, \
|
||||
jhSbuffer[63] = (64*8) & 0xff; \
|
||||
b = true; \
|
||||
} \
|
||||
|
68
algo/keccak/keccak-4way.c
Normal file
68
algo/keccak/keccak-4way.c
Normal file
@@ -0,0 +1,68 @@
|
||||
#include "keccak-gate.h"
|
||||
|
||||
#ifdef KECCAK_4WAY
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_keccak.h"
|
||||
#include "keccak-hash-4way.h"
|
||||
|
||||
void keccakhash_4way(void *state, const void *input)
|
||||
{
|
||||
uint64_t vhash[4*4] __attribute__ ((aligned (64)));
|
||||
keccak256_4way_context ctx;
|
||||
|
||||
keccak256_4way_init( &ctx );
|
||||
keccak256_4way( &ctx, input, 80 );
|
||||
keccak256_4way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
// const uint32_t Htarg = ptarget[7];
|
||||
uint32_t endiandata[20];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
|
||||
keccakhash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( ( (hash+(i<<3))[7] & 0xFFFFFF00 ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
46
algo/keccak/keccak-gate.c
Normal file
46
algo/keccak/keccak-gate.c
Normal file
@@ -0,0 +1,46 @@
|
||||
#include "keccak-gate.h"
|
||||
|
||||
void keccak_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (128.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
int64_t keccak_get_max64() { return 0x7ffffLL; }
|
||||
|
||||
bool register_keccak_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->set_target = (void*)&keccak_set_target;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
gate->hash = (void*)&keccakhash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_keccak;
|
||||
gate->hash = (void*)&keccakhash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
||||
void keccakc_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_keccakc_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT;
|
||||
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
|
||||
gate->set_target = (void*)&keccakc_set_target;
|
||||
gate->get_max64 = (void*)&keccak_get_max64;
|
||||
#if defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_keccak_4way;
|
||||
gate->hash = (void*)&keccakhash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_keccak;
|
||||
gate->hash = (void*)&keccakhash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
23
algo/keccak/keccak-gate.h
Normal file
23
algo/keccak/keccak-gate.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef KECCAK_GATE_H__
|
||||
#define KECCAK_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define KECCAK_4WAY
|
||||
#endif
|
||||
|
||||
#if defined(KECCAK_4WAY)
|
||||
|
||||
void keccakhash_4way( void *state, const void *input );
|
||||
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
||||
|
||||
void keccakhash( void *state, const void *input );
|
||||
int scanhash_keccak( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
|
||||
#endif
|
503
algo/keccak/keccak-hash-4way.c
Normal file
503
algo/keccak/keccak-hash-4way.c
Normal file
@@ -0,0 +1,503 @@
|
||||
#include <stddef.h>
|
||||
#include "keccak-hash-4way.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
static const sph_u64 RC[] = {
|
||||
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
||||
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
||||
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
|
||||
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
|
||||
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
|
||||
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
|
||||
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
|
||||
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
|
||||
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
||||
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
||||
};
|
||||
|
||||
#define a00 (kc->w[ 0])
|
||||
#define a10 (kc->w[ 1])
|
||||
#define a20 (kc->w[ 2])
|
||||
#define a30 (kc->w[ 3])
|
||||
#define a40 (kc->w[ 4])
|
||||
#define a01 (kc->w[ 5])
|
||||
#define a11 (kc->w[ 6])
|
||||
#define a21 (kc->w[ 7])
|
||||
#define a31 (kc->w[ 8])
|
||||
#define a41 (kc->w[ 9])
|
||||
#define a02 (kc->w[10])
|
||||
#define a12 (kc->w[11])
|
||||
#define a22 (kc->w[12])
|
||||
#define a32 (kc->w[13])
|
||||
#define a42 (kc->w[14])
|
||||
#define a03 (kc->w[15])
|
||||
#define a13 (kc->w[16])
|
||||
#define a23 (kc->w[17])
|
||||
#define a33 (kc->w[18])
|
||||
#define a43 (kc->w[19])
|
||||
#define a04 (kc->w[20])
|
||||
#define a14 (kc->w[21])
|
||||
#define a24 (kc->w[22])
|
||||
#define a34 (kc->w[23])
|
||||
#define a44 (kc->w[24])
|
||||
|
||||
#define DECL_STATE
|
||||
#define READ_STATE(sc)
|
||||
#define WRITE_STATE(sc)
|
||||
|
||||
#define INPUT_BUF(size) do { \
|
||||
size_t j; \
|
||||
for (j = 0; j < (size>>3); j++ ) \
|
||||
kc->w[j ] = _mm256_xor_si256( kc->w[j], buf[j] ); \
|
||||
} while (0)
|
||||
|
||||
#define DECL64(x) __m256i x
|
||||
#define MOV64(d, s) (d = s)
|
||||
#define XOR64(d, a, b) (d = _mm256_xor_si256(a,b))
|
||||
#define AND64(d, a, b) (d = _mm256_and_si256(a,b))
|
||||
#define OR64(d, a, b) (d = _mm256_or_si256(a,b))
|
||||
#define NOT64(d, s) (d = _mm256_xor_si256(s,m256_neg1))
|
||||
#define ROL64(d, v, n) (d = mm256_rotl_64(v, n))
|
||||
#define XOR64_IOTA XOR64
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
||||
DECL64(tt0); \
|
||||
DECL64(tt1); \
|
||||
DECL64(tt2); \
|
||||
DECL64(tt3); \
|
||||
XOR64(tt0, d0, d1); \
|
||||
XOR64(tt1, d2, d3); \
|
||||
XOR64(tt0, tt0, d4); \
|
||||
XOR64(tt0, tt0, tt1); \
|
||||
ROL64(tt0, tt0, 1); \
|
||||
XOR64(tt2, c0, c1); \
|
||||
XOR64(tt3, c2, c3); \
|
||||
XOR64(tt0, tt0, c4); \
|
||||
XOR64(tt2, tt2, tt3); \
|
||||
XOR64(t, tt0, tt2); \
|
||||
} while (0)
|
||||
|
||||
#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
||||
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
||||
b40, b41, b42, b43, b44) \
|
||||
do { \
|
||||
DECL64(t0); \
|
||||
DECL64(t1); \
|
||||
DECL64(t2); \
|
||||
DECL64(t3); \
|
||||
DECL64(t4); \
|
||||
TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
|
||||
TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
|
||||
TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
|
||||
TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
|
||||
TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
|
||||
XOR64(b00, b00, t0); \
|
||||
XOR64(b01, b01, t0); \
|
||||
XOR64(b02, b02, t0); \
|
||||
XOR64(b03, b03, t0); \
|
||||
XOR64(b04, b04, t0); \
|
||||
XOR64(b10, b10, t1); \
|
||||
XOR64(b11, b11, t1); \
|
||||
XOR64(b12, b12, t1); \
|
||||
XOR64(b13, b13, t1); \
|
||||
XOR64(b14, b14, t1); \
|
||||
XOR64(b20, b20, t2); \
|
||||
XOR64(b21, b21, t2); \
|
||||
XOR64(b22, b22, t2); \
|
||||
XOR64(b23, b23, t2); \
|
||||
XOR64(b24, b24, t2); \
|
||||
XOR64(b30, b30, t3); \
|
||||
XOR64(b31, b31, t3); \
|
||||
XOR64(b32, b32, t3); \
|
||||
XOR64(b33, b33, t3); \
|
||||
XOR64(b34, b34, t3); \
|
||||
XOR64(b40, b40, t4); \
|
||||
XOR64(b41, b41, t4); \
|
||||
XOR64(b42, b42, t4); \
|
||||
XOR64(b43, b43, t4); \
|
||||
XOR64(b44, b44, t4); \
|
||||
} while (0)
|
||||
|
||||
#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
||||
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
||||
b40, b41, b42, b43, b44) \
|
||||
do { \
|
||||
/* ROL64(b00, b00, 0); */ \
|
||||
ROL64(b01, b01, 36); \
|
||||
ROL64(b02, b02, 3); \
|
||||
ROL64(b03, b03, 41); \
|
||||
ROL64(b04, b04, 18); \
|
||||
ROL64(b10, b10, 1); \
|
||||
ROL64(b11, b11, 44); \
|
||||
ROL64(b12, b12, 10); \
|
||||
ROL64(b13, b13, 45); \
|
||||
ROL64(b14, b14, 2); \
|
||||
ROL64(b20, b20, 62); \
|
||||
ROL64(b21, b21, 6); \
|
||||
ROL64(b22, b22, 43); \
|
||||
ROL64(b23, b23, 15); \
|
||||
ROL64(b24, b24, 61); \
|
||||
ROL64(b30, b30, 28); \
|
||||
ROL64(b31, b31, 55); \
|
||||
ROL64(b32, b32, 25); \
|
||||
ROL64(b33, b33, 21); \
|
||||
ROL64(b34, b34, 56); \
|
||||
ROL64(b40, b40, 27); \
|
||||
ROL64(b41, b41, 20); \
|
||||
ROL64(b42, b42, 39); \
|
||||
ROL64(b43, b43, 8); \
|
||||
ROL64(b44, b44, 14); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* The KHI macro integrates the "lane complement" optimization. On input,
|
||||
* some words are complemented:
|
||||
* a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
|
||||
* On output, the following words are complemented:
|
||||
* a04 a10 a20 a22 a23 a31
|
||||
*
|
||||
* The (implicit) permutation and the theta expansion will bring back
|
||||
* the input mask for the next round.
|
||||
*/
|
||||
|
||||
#define KHI_XO(d, a, b, c) do { \
|
||||
DECL64(kt); \
|
||||
OR64(kt, b, c); \
|
||||
XOR64(d, a, kt); \
|
||||
} while (0)
|
||||
|
||||
#define KHI_XA(d, a, b, c) do { \
|
||||
DECL64(kt); \
|
||||
AND64(kt, b, c); \
|
||||
XOR64(d, a, kt); \
|
||||
} while (0)
|
||||
|
||||
#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
||||
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
||||
b40, b41, b42, b43, b44) \
|
||||
do { \
|
||||
DECL64(c0); \
|
||||
DECL64(c1); \
|
||||
DECL64(c2); \
|
||||
DECL64(c3); \
|
||||
DECL64(c4); \
|
||||
DECL64(bnn); \
|
||||
NOT64(bnn, b20); \
|
||||
KHI_XO(c0, b00, b10, b20); \
|
||||
KHI_XO(c1, b10, bnn, b30); \
|
||||
KHI_XA(c2, b20, b30, b40); \
|
||||
KHI_XO(c3, b30, b40, b00); \
|
||||
KHI_XA(c4, b40, b00, b10); \
|
||||
MOV64(b00, c0); \
|
||||
MOV64(b10, c1); \
|
||||
MOV64(b20, c2); \
|
||||
MOV64(b30, c3); \
|
||||
MOV64(b40, c4); \
|
||||
NOT64(bnn, b41); \
|
||||
KHI_XO(c0, b01, b11, b21); \
|
||||
KHI_XA(c1, b11, b21, b31); \
|
||||
KHI_XO(c2, b21, b31, bnn); \
|
||||
KHI_XO(c3, b31, b41, b01); \
|
||||
KHI_XA(c4, b41, b01, b11); \
|
||||
MOV64(b01, c0); \
|
||||
MOV64(b11, c1); \
|
||||
MOV64(b21, c2); \
|
||||
MOV64(b31, c3); \
|
||||
MOV64(b41, c4); \
|
||||
NOT64(bnn, b32); \
|
||||
KHI_XO(c0, b02, b12, b22); \
|
||||
KHI_XA(c1, b12, b22, b32); \
|
||||
KHI_XA(c2, b22, bnn, b42); \
|
||||
KHI_XO(c3, bnn, b42, b02); \
|
||||
KHI_XA(c4, b42, b02, b12); \
|
||||
MOV64(b02, c0); \
|
||||
MOV64(b12, c1); \
|
||||
MOV64(b22, c2); \
|
||||
MOV64(b32, c3); \
|
||||
MOV64(b42, c4); \
|
||||
NOT64(bnn, b33); \
|
||||
KHI_XA(c0, b03, b13, b23); \
|
||||
KHI_XO(c1, b13, b23, b33); \
|
||||
KHI_XO(c2, b23, bnn, b43); \
|
||||
KHI_XA(c3, bnn, b43, b03); \
|
||||
KHI_XO(c4, b43, b03, b13); \
|
||||
MOV64(b03, c0); \
|
||||
MOV64(b13, c1); \
|
||||
MOV64(b23, c2); \
|
||||
MOV64(b33, c3); \
|
||||
MOV64(b43, c4); \
|
||||
NOT64(bnn, b14); \
|
||||
KHI_XA(c0, b04, bnn, b24); \
|
||||
KHI_XO(c1, bnn, b24, b34); \
|
||||
KHI_XA(c2, b24, b34, b44); \
|
||||
KHI_XO(c3, b34, b44, b04); \
|
||||
KHI_XA(c4, b44, b04, b14); \
|
||||
MOV64(b04, c0); \
|
||||
MOV64(b14, c1); \
|
||||
MOV64(b24, c2); \
|
||||
MOV64(b34, c3); \
|
||||
MOV64(b44, c4); \
|
||||
} while (0)
|
||||
|
||||
#define IOTA(r) XOR64_IOTA(a00, a00, r)
|
||||
|
||||
#define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
|
||||
a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
|
||||
#define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
|
||||
a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
|
||||
#define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
|
||||
a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
|
||||
#define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
|
||||
a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
|
||||
#define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
|
||||
a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
|
||||
#define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
|
||||
a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
|
||||
#define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
|
||||
a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
|
||||
#define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
|
||||
a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
|
||||
#define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
|
||||
a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
|
||||
#define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
|
||||
a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
|
||||
#define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
|
||||
a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
|
||||
#define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
|
||||
a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
|
||||
#define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
|
||||
a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
|
||||
#define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
|
||||
a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
|
||||
#define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
|
||||
a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
|
||||
#define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
|
||||
a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
|
||||
#define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
|
||||
a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
|
||||
#define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
|
||||
a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
|
||||
#define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
|
||||
a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
|
||||
#define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
|
||||
a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
|
||||
#define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
|
||||
a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
|
||||
#define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
|
||||
a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
|
||||
#define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
|
||||
a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
|
||||
#define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
|
||||
a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
|
||||
|
||||
#define P8_TO_P0 do { \
|
||||
DECL64(t); \
|
||||
MOV64(t, a01); \
|
||||
MOV64(a01, a11); \
|
||||
MOV64(a11, a43); \
|
||||
MOV64(a43, t); \
|
||||
MOV64(t, a02); \
|
||||
MOV64(a02, a22); \
|
||||
MOV64(a22, a31); \
|
||||
MOV64(a31, t); \
|
||||
MOV64(t, a03); \
|
||||
MOV64(a03, a33); \
|
||||
MOV64(a33, a24); \
|
||||
MOV64(a24, t); \
|
||||
MOV64(t, a04); \
|
||||
MOV64(a04, a44); \
|
||||
MOV64(a44, a12); \
|
||||
MOV64(a12, t); \
|
||||
MOV64(t, a10); \
|
||||
MOV64(a10, a32); \
|
||||
MOV64(a32, a13); \
|
||||
MOV64(a13, t); \
|
||||
MOV64(t, a14); \
|
||||
MOV64(a14, a21); \
|
||||
MOV64(a21, a20); \
|
||||
MOV64(a20, t); \
|
||||
MOV64(t, a23); \
|
||||
MOV64(a23, a42); \
|
||||
MOV64(a42, a40); \
|
||||
MOV64(a40, t); \
|
||||
MOV64(t, a30); \
|
||||
MOV64(a30, a41); \
|
||||
MOV64(a41, a34); \
|
||||
MOV64(a34, t); \
|
||||
} while (0)
|
||||
|
||||
#define LPAR (
|
||||
#define RPAR )
|
||||
|
||||
#define KF_ELT(r, s, k) do { \
|
||||
THETA LPAR P ## r RPAR; \
|
||||
RHO LPAR P ## r RPAR; \
|
||||
KHI LPAR P ## s RPAR; \
|
||||
IOTA(k); \
|
||||
} while (0)
|
||||
|
||||
#define DO(x) x
|
||||
|
||||
#define KECCAK_F_1600 DO(KECCAK_F_1600_)
|
||||
|
||||
#define KECCAK_F_1600_ do { \
|
||||
int j; \
|
||||
for (j = 0; j < 24; j += 8) \
|
||||
{ \
|
||||
KF_ELT( 0, 1, (_mm256_set_epi64x( RC[j + 0], RC[j + 0], \
|
||||
RC[j + 0], RC[j + 0])) ); \
|
||||
KF_ELT( 1, 2, (_mm256_set_epi64x( RC[j + 1], RC[j + 1], \
|
||||
RC[j + 1], RC[j + 1])) ); \
|
||||
KF_ELT( 2, 3, (_mm256_set_epi64x( RC[j + 2], RC[j + 2], \
|
||||
RC[j + 2], RC[j + 2])) ); \
|
||||
KF_ELT( 3, 4, (_mm256_set_epi64x( RC[j + 3], RC[j + 3], \
|
||||
RC[j + 3], RC[j + 3])) ); \
|
||||
KF_ELT( 4, 5, (_mm256_set_epi64x( RC[j + 4], RC[j + 4], \
|
||||
RC[j + 4], RC[j + 4])) ); \
|
||||
KF_ELT( 5, 6, (_mm256_set_epi64x( RC[j + 5], RC[j + 5], \
|
||||
RC[j + 5], RC[j + 5])) ); \
|
||||
KF_ELT( 6, 7, (_mm256_set_epi64x( RC[j + 6], RC[j + 6], \
|
||||
RC[j + 6], RC[j + 6])) ); \
|
||||
KF_ELT( 7, 8, (_mm256_set_epi64x( RC[j + 7], RC[j + 7], \
|
||||
RC[j + 7], RC[j + 7])) ); \
|
||||
P8_TO_P0; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
static void keccak64_init( keccak64_ctx_m256i *kc, unsigned out_size )
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 25; i ++)
|
||||
kc->w[i] = _mm256_setzero_si256();
|
||||
|
||||
// Initialization for the "lane complement".
|
||||
kc->w[ 1] = m256_neg1;
|
||||
kc->w[ 2] = m256_neg1;
|
||||
kc->w[ 8] = m256_neg1;
|
||||
kc->w[12] = m256_neg1;
|
||||
kc->w[17] = m256_neg1;
|
||||
kc->w[20] = m256_neg1;
|
||||
kc->ptr = 0;
|
||||
kc->lim = 200 - (out_size >> 2);
|
||||
}
|
||||
|
||||
static void
|
||||
keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
|
||||
size_t lim )
|
||||
{
|
||||
__m256i *buf;
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = kc->buf;
|
||||
ptr = kc->ptr;
|
||||
|
||||
if ( len < (lim - ptr) )
|
||||
{
|
||||
memcpy_256( buf + (ptr>>3), vdata, len>>3 );
|
||||
kc->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE( kc );
|
||||
while ( len > 0 )
|
||||
{
|
||||
size_t clen;
|
||||
|
||||
clen = (lim - ptr);
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_256( buf + (ptr>>3), vdata, clen>>3 );
|
||||
ptr += clen;
|
||||
vdata = vdata + (clen>>3);
|
||||
len -= clen;
|
||||
if ( ptr == lim )
|
||||
{
|
||||
INPUT_BUF( lim );
|
||||
KECCAK_F_1600;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE( kc );
|
||||
kc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
size_t lim )
|
||||
{
|
||||
unsigned eb;
|
||||
union {
|
||||
__m256i tmp[lim + 1];
|
||||
sph_u64 dummy; /* for alignment */
|
||||
} u;
|
||||
size_t j;
|
||||
size_t m256_len = byte_len >> 3;
|
||||
|
||||
eb = 0x100 >> 8;
|
||||
if ( kc->ptr == (lim - 8) )
|
||||
{
|
||||
uint64_t t = eb | 0x8000000000000000;
|
||||
u.tmp[0] = _mm256_set_epi64x( t, t, t, t );
|
||||
j = 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
j = lim - kc->ptr;
|
||||
u.tmp[0] = _mm256_set_epi64x( eb, eb, eb, eb );
|
||||
memset_zero_256( u.tmp + 1, (j>>3) - 2 );
|
||||
u.tmp[ (j>>3) - 1] = _mm256_set_epi64x( 0x8000000000000000,
|
||||
0x8000000000000000, 0x8000000000000000, 0x8000000000000000);
|
||||
}
|
||||
keccak64_core( kc, u.tmp, j, lim );
|
||||
/* Finalize the "lane complement" */
|
||||
NOT64( kc->w[ 1], kc->w[ 1] );
|
||||
NOT64( kc->w[ 2], kc->w[ 2] );
|
||||
NOT64( kc->w[ 8], kc->w[ 8] );
|
||||
NOT64( kc->w[12], kc->w[12] );
|
||||
NOT64( kc->w[17], kc->w[17] );
|
||||
NOT64( kc->w[20], kc->w[20] );
|
||||
for ( j = 0; j < m256_len; j++ )
|
||||
u.tmp[j] = kc->w[j];
|
||||
memcpy_256( dst, u.tmp, m256_len );
|
||||
}
|
||||
|
||||
void keccak256_4way_init( void *kc )
|
||||
{
|
||||
keccak64_init( kc, 256 );
|
||||
}
|
||||
|
||||
void
|
||||
keccak256_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
keccak64_core(cc, data, len, 136);
|
||||
}
|
||||
|
||||
void
|
||||
keccak256_4way_close(void *cc, void *dst)
|
||||
{
|
||||
keccak64_close(cc, dst, 32, 136);
|
||||
}
|
||||
|
||||
void keccak512_4way_init( void *kc )
|
||||
{
|
||||
keccak64_init( kc, 512 );
|
||||
}
|
||||
|
||||
void
|
||||
keccak512_4way(void *cc, const void *data, size_t len)
|
||||
{
|
||||
keccak64_core(cc, data, len, 72);
|
||||
}
|
||||
|
||||
void
|
||||
keccak512_4way_close(void *cc, void *dst)
|
||||
{
|
||||
keccak64_close(cc, dst, 64, 72);
|
||||
}
|
||||
|
||||
#endif
|
94
algo/keccak/keccak-hash-4way.h
Normal file
94
algo/keccak/keccak-hash-4way.h
Normal file
@@ -0,0 +1,94 @@
|
||||
/* $Id: sph_keccak.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* Keccak interface. This is the interface for Keccak with the
|
||||
* recommended parameters for SHA-3, with output lengths 224, 256,
|
||||
* 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_keccak.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef KECCAK_HASH_4WAY_H__
|
||||
#define KECCAK_HASH_4WAY_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#define SPH_SIZE_keccak256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-512.
|
||||
*/
|
||||
#define SPH_SIZE_keccak512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Keccak computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once a
|
||||
* Keccak computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Keccak computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
__m256i buf[144*8]; /* first field, for alignment */
|
||||
__m256i w[25];
|
||||
size_t ptr, lim;
|
||||
// sph_u64 wide[25];
|
||||
} keccak64_ctx_m256i;
|
||||
|
||||
typedef keccak64_ctx_m256i keccak256_4way_context;
|
||||
typedef keccak64_ctx_m256i keccak512_4way_context;
|
||||
|
||||
void keccak256_4way_init(void *cc);
|
||||
void keccak256_4way(void *cc, const void *data, size_t len);
|
||||
void keccak256_4way_close(void *cc, void *dst);
|
||||
|
||||
|
||||
void keccak512_4way_init(void *cc);
|
||||
void keccak512_4way(void *cc, const void *data, size_t len);
|
||||
void keccak512_4way_close(void *cc, void *dst);
|
||||
void keccak512_4way_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -1,4 +1,3 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
@@ -51,17 +50,3 @@ int scanhash_keccak(int thr_id, struct work *work,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void keccak_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (128.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_keccak_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = (void*)&scanhash_keccak;
|
||||
gate->hash = (void*)&keccakhash;
|
||||
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
|
||||
gate->set_target = (void*)&keccak_set_target;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -955,6 +955,7 @@ static const struct {
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
||||
DECL64(tt0); \
|
||||
DECL64(tt1); \
|
||||
@@ -1643,8 +1644,7 @@ keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)
|
||||
for (j = 0; j < d; j += 8) \
|
||||
sph_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \
|
||||
memcpy(dst, u.tmp, d); \
|
||||
keccak_init(kc, (unsigned)d << 3); \
|
||||
} \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -775,10 +775,8 @@ static const sph_u64 RC[] = {
|
||||
KF_ELT( 5, 6, RC[j + 5]); \
|
||||
KF_ELT( 6, 7, RC[j + 6]); \
|
||||
KF_ELT( 7, 8, RC[j + 7]); \
|
||||
*/
|
||||
|
||||
//kekDECL_STATE \
|
||||
|
||||
kekDECL_STATE \
|
||||
*/
|
||||
#define DECL_KEC
|
||||
|
||||
|
||||
|
260
algo/lbry.c
260
algo/lbry.c
@@ -1,260 +0,0 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "ripemd/sph_ripemd.h"
|
||||
#include "sha/sph_sha2.h"
|
||||
#if defined __SHA__
|
||||
#include <openssl/sha.h>
|
||||
#endif
|
||||
|
||||
#define LBRY_NTIME_INDEX 25
|
||||
#define LBRY_NBITS_INDEX 26
|
||||
#define LBRY_NONCE_INDEX 27
|
||||
#define LBRY_WORK_DATA_SIZE 192
|
||||
#define LBRY_WORK_CMP_SIZE 76 // same as default
|
||||
|
||||
/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */
|
||||
typedef struct {
|
||||
#if defined __SHA__
|
||||
SHA256_CTX sha256;
|
||||
#else
|
||||
sph_sha256_context sha256;
|
||||
#endif
|
||||
sph_sha512_context sha512;
|
||||
sph_ripemd160_context ripemd;
|
||||
} lbryhash_context_holder;
|
||||
|
||||
/* no need to copy, because close reinit the context */
|
||||
static lbryhash_context_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void init_lbry_contexts(void *dummy)
|
||||
{
|
||||
#if defined __SHA__
|
||||
SHA256_Init( &ctx.sha256 );
|
||||
#else
|
||||
sph_sha256_init( &ctx.sha256 );
|
||||
#endif
|
||||
sph_sha512_init( &ctx.sha512 );
|
||||
sph_ripemd160_init( &ctx.ripemd );
|
||||
}
|
||||
|
||||
void lbry_hash(void* output, const void* input)
|
||||
{
|
||||
#if defined __SHA__
|
||||
SHA256_CTX ctx_sha256 __attribute__ ((aligned (64)));
|
||||
#else
|
||||
sph_sha256_context ctx_sha256 __attribute__ ((aligned (64)));
|
||||
#endif
|
||||
sph_sha512_context ctx_sha512 __attribute__ ((aligned (64)));
|
||||
sph_ripemd160_context ctx_ripemd __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) hashA[16];
|
||||
uint32_t _ALIGN(64) hashB[16];
|
||||
uint32_t _ALIGN(64) hashC[16];
|
||||
|
||||
#if defined __SHA__
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, input, 112 );
|
||||
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, hashA, 32 );
|
||||
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
|
||||
#else
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256 ( &ctx_sha256, input, 112 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256 ( &ctx_sha256, hashA, 32 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
#endif
|
||||
|
||||
sph_sha512_init( &ctx_sha512 );
|
||||
sph_sha512 ( &ctx_sha512, hashA, 32 );
|
||||
sph_sha512_close( &ctx_sha512, hashA );
|
||||
|
||||
sph_ripemd160_init( &ctx_ripemd );
|
||||
sph_ripemd160 ( &ctx_ripemd, hashA, 32 );
|
||||
sph_ripemd160_close( &ctx_ripemd, hashB );
|
||||
|
||||
sph_ripemd160_init( &ctx_ripemd );
|
||||
sph_ripemd160 ( &ctx_ripemd, hashA+8, 32 );
|
||||
sph_ripemd160_close( &ctx_ripemd, hashC );
|
||||
|
||||
#if defined __SHA__
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, hashB, 20 );
|
||||
SHA256_Update( &ctx_sha256, hashC, 20 );
|
||||
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
|
||||
|
||||
SHA256_Init( &ctx_sha256 );
|
||||
SHA256_Update( &ctx_sha256, hashA, 32 );
|
||||
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
|
||||
#else
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256 ( &ctx_sha256, hashB, 20 );
|
||||
sph_sha256 ( &ctx_sha256, hashC, 20 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
|
||||
sph_sha256_init( &ctx_sha256 );
|
||||
sph_sha256 ( &ctx_sha256, hashA, 32 );
|
||||
sph_sha256_close( &ctx_sha256, hashA );
|
||||
#endif
|
||||
memcpy( output, hashA, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[27] - 1;
|
||||
const uint32_t first_nonce = pdata[27];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t endiandata[32] __attribute__ ((aligned (64)));
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
0xF,
|
||||
0xFF,
|
||||
0xFFF,
|
||||
0xFFFF,
|
||||
0x10000000
|
||||
};
|
||||
uint32_t masks[] = {
|
||||
0xFFFFFFFF,
|
||||
0xFFFFFFF0,
|
||||
0xFFFFFF00,
|
||||
0xFFFFF000,
|
||||
0xFFFF0000,
|
||||
0
|
||||
};
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 32 );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
printf("[%d] Htarg=%X\n", thr_id, Htarg);
|
||||
#endif
|
||||
for (int m=0; m < sizeof(masks); m++) {
|
||||
if (Htarg <= htmax[m]) {
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
pdata[27] = ++n;
|
||||
be32enc(&endiandata[27], n);
|
||||
lbry_hash(hash64, &endiandata);
|
||||
#ifndef DEBUG_ALGO
|
||||
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
if (!(n % 0x1000) && !thr_id) printf(".");
|
||||
if (!(hash64[7] & mask)) {
|
||||
printf("[%d]",thr_id);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
// see blake.c if else to understand the loop on htmax => mask
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[27] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
double lbry_calc_network_diff( struct work *work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
|
||||
uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] );
|
||||
uint32_t bits = (nbits & 0xffffff);
|
||||
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
|
||||
double d = (double)0x0000ffff / (double)bits;
|
||||
|
||||
for (int m=shift; m < 29; m++) d *= 256.0;
|
||||
for (int m=29; m < shift; m++) d /= 256.0;
|
||||
if (opt_debug_diff)
|
||||
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
// std_le should work but it doesn't
|
||||
void lbry_le_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
|
||||
le32enc( &ntime, work->data[ LBRY_NTIME_INDEX ] );
|
||||
le32enc( &nonce, work->data[ LBRY_NONCE_INDEX ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
xnonce2str = abin2hex( work->xnonce2, work->xnonce2_len);
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
unsigned char merkle_root[64] = { 0 };
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
algo_gate.gen_merkle_root( merkle_root, sctx );
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
|
||||
g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
|
||||
void lbry_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
int64_t lbry_get_max64() { return 0x1ffffLL; }
|
||||
|
||||
bool register_lbry_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_lbry;
|
||||
gate->hash = (void*)&lbry_hash;
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->get_max64 = (void*)&lbry_get_max64;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
gate->build_extraheader = (void*)&lbry_build_extraheader;
|
||||
gate->set_target = (void*)&lbry_set_target;
|
||||
gate->ntime_index = LBRY_NTIME_INDEX;
|
||||
gate->nbits_index = LBRY_NBITS_INDEX;
|
||||
gate->nonce_index = LBRY_NONCE_INDEX;
|
||||
gate->work_data_size = LBRY_WORK_DATA_SIZE;
|
||||
return true;
|
||||
}
|
||||
|
583
algo/luffa/luffa-hash-2way.c
Normal file
583
algo/luffa/luffa-hash-2way.c
Normal file
@@ -0,0 +1,583 @@
|
||||
/*
|
||||
* luffa_for_sse2.c
|
||||
* Version 2.0 (Sep 15th 2009)
|
||||
*
|
||||
* Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
|
||||
*
|
||||
* Hitachi, Ltd. is the owner of this software and hereby grant
|
||||
* the U.S. Government and any interested party the right to use
|
||||
* this software for the purposes of the SHA-3 evaluation process,
|
||||
* notwithstanding that this software is copyrighted.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <immintrin.h>
|
||||
#include "luffa-hash-2way.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include "avxdefs.h"
|
||||
|
||||
#define MASK _mm256_set_epi32( 0UL, 0UL, 0UL, 0xffffffffUL, \
|
||||
0UL, 0UL, 0UL, 0xffffffffUL )
|
||||
|
||||
#define ADD_CONSTANT(a,b,c0,c1)\
|
||||
a = _mm256_xor_si256(a,c0);\
|
||||
b = _mm256_xor_si256(b,c1);\
|
||||
|
||||
#define MULT2(a0,a1) \
|
||||
do { \
|
||||
register __m256i b = _mm256_xor_si256( a0, \
|
||||
_mm256_shuffle_epi32( _mm256_and_si256(a1,MASK), 16 ) ); \
|
||||
a0 = _mm256_or_si256( _mm256_srli_si256(b,4), _mm256_slli_si256(a1,12) ); \
|
||||
a1 = _mm256_or_si256( _mm256_srli_si256(a1,4), _mm256_slli_si256(b,12) ); \
|
||||
} while(0)
|
||||
|
||||
// confirm pointer arithmetic
|
||||
// ok but use array indexes
|
||||
#define STEP_PART(x,c,t)\
|
||||
SUBCRUMB(*x,*(x+1),*(x+2),*(x+3),*t);\
|
||||
SUBCRUMB(*(x+5),*(x+6),*(x+7),*(x+4),*t);\
|
||||
MIXWORD(*x,*(x+4),*t,*(t+1));\
|
||||
MIXWORD(*(x+1),*(x+5),*t,*(t+1));\
|
||||
MIXWORD(*(x+2),*(x+6),*t,*(t+1));\
|
||||
MIXWORD(*(x+3),*(x+7),*t,*(t+1));\
|
||||
ADD_CONSTANT(*x, *(x+4), *c, *(c+1));
|
||||
|
||||
#define SUBCRUMB(a0,a1,a2,a3,t)\
|
||||
t = _mm256_load_si256(&a0);\
|
||||
a0 = _mm256_or_si256(a0,a1);\
|
||||
a2 = _mm256_xor_si256(a2,a3);\
|
||||
a1 = _mm256_andnot_si256(a1, m256_neg1 );\
|
||||
a0 = _mm256_xor_si256(a0,a3);\
|
||||
a3 = _mm256_and_si256(a3,t);\
|
||||
a1 = _mm256_xor_si256(a1,a3);\
|
||||
a3 = _mm256_xor_si256(a3,a2);\
|
||||
a2 = _mm256_and_si256(a2,a0);\
|
||||
a0 = _mm256_andnot_si256(a0, m256_neg1 );\
|
||||
a2 = _mm256_xor_si256(a2,a1);\
|
||||
a1 = _mm256_or_si256(a1,a3);\
|
||||
t = _mm256_xor_si256(t,a1);\
|
||||
a3 = _mm256_xor_si256(a3,a2);\
|
||||
a2 = _mm256_and_si256(a2,a1);\
|
||||
a1 = _mm256_xor_si256(a1,a0);\
|
||||
a0 = _mm256_load_si256(&t);\
|
||||
|
||||
#define MIXWORD(a,b,t1,t2)\
|
||||
b = _mm256_xor_si256(a,b);\
|
||||
t1 = _mm256_slli_epi32(a,2);\
|
||||
t2 = _mm256_srli_epi32(a,30);\
|
||||
a = _mm256_or_si256(t1,t2);\
|
||||
a = _mm256_xor_si256(a,b);\
|
||||
t1 = _mm256_slli_epi32(b,14);\
|
||||
t2 = _mm256_srli_epi32(b,18);\
|
||||
b = _mm256_or_si256(t1,t2);\
|
||||
b = _mm256_xor_si256(a,b);\
|
||||
t1 = _mm256_slli_epi32(a,10);\
|
||||
t2 = _mm256_srli_epi32(a,22);\
|
||||
a = _mm256_or_si256(t1,t2);\
|
||||
a = _mm256_xor_si256(a,b);\
|
||||
t1 = _mm256_slli_epi32(b,1);\
|
||||
t2 = _mm256_srli_epi32(b,31);\
|
||||
b = _mm256_or_si256(t1,t2);
|
||||
|
||||
#define STEP_PART2(a0,a1,t0,t1,c0,c1,tmp0,tmp1)\
|
||||
a1 = _mm256_shuffle_epi32(a1,147);\
|
||||
t0 = _mm256_load_si256(&a1);\
|
||||
a1 = _mm256_unpacklo_epi32(a1,a0);\
|
||||
t0 = _mm256_unpackhi_epi32(t0,a0);\
|
||||
t1 = _mm256_shuffle_epi32(t0,78);\
|
||||
a0 = _mm256_shuffle_epi32(a1,78);\
|
||||
SUBCRUMB(t1,t0,a0,a1,tmp0);\
|
||||
t0 = _mm256_unpacklo_epi32(t0,t1);\
|
||||
a1 = _mm256_unpacklo_epi32(a1,a0);\
|
||||
a0 = _mm256_load_si256(&a1);\
|
||||
a0 = _mm256_unpackhi_epi64(a0,t0);\
|
||||
a1 = _mm256_unpacklo_epi64(a1,t0);\
|
||||
a1 = _mm256_shuffle_epi32(a1,57);\
|
||||
MIXWORD(a0,a1,tmp0,tmp1);\
|
||||
ADD_CONSTANT(a0,a1,c0,c1);
|
||||
|
||||
#define NMLTOM768(r0,r1,r2,s0,s1,s2,s3,p0,p1,p2,q0,q1,q2,q3)\
|
||||
s2 = _mm256_load_si256(&r1);\
|
||||
q2 = _mm256_load_si256(&p1);\
|
||||
r2 = _mm256_shuffle_epi32(r2,216);\
|
||||
p2 = _mm256_shuffle_epi32(p2,216);\
|
||||
r1 = _mm256_unpacklo_epi32(r1,r0);\
|
||||
p1 = _mm256_unpacklo_epi32(p1,p0);\
|
||||
s2 = _mm256_unpackhi_epi32(s2,r0);\
|
||||
q2 = _mm256_unpackhi_epi32(q2,p0);\
|
||||
s0 = _mm256_load_si256(&r2);\
|
||||
q0 = _mm256_load_si256(&p2);\
|
||||
r2 = _mm256_unpacklo_epi64(r2,r1);\
|
||||
p2 = _mm256_unpacklo_epi64(p2,p1);\
|
||||
s1 = _mm256_load_si256(&s0);\
|
||||
q1 = _mm256_load_si256(&q0);\
|
||||
s0 = _mm256_unpackhi_epi64(s0,r1);\
|
||||
q0 = _mm256_unpackhi_epi64(q0,p1);\
|
||||
r2 = _mm256_shuffle_epi32(r2,225);\
|
||||
p2 = _mm256_shuffle_epi32(p2,225);\
|
||||
r0 = _mm256_load_si256(&s1);\
|
||||
p0 = _mm256_load_si256(&q1);\
|
||||
s0 = _mm256_shuffle_epi32(s0,225);\
|
||||
q0 = _mm256_shuffle_epi32(q0,225);\
|
||||
s1 = _mm256_unpacklo_epi64(s1,s2);\
|
||||
q1 = _mm256_unpacklo_epi64(q1,q2);\
|
||||
r0 = _mm256_unpackhi_epi64(r0,s2);\
|
||||
p0 = _mm256_unpackhi_epi64(p0,q2);\
|
||||
s2 = _mm256_load_si256(&r0);\
|
||||
q2 = _mm256_load_si256(&p0);\
|
||||
s3 = _mm256_load_si256(&r2);\
|
||||
q3 = _mm256_load_si256(&p2);\
|
||||
|
||||
#define MIXTON768(r0,r1,r2,r3,s0,s1,s2,p0,p1,p2,p3,q0,q1,q2)\
|
||||
s0 = _mm256_load_si256(&r0);\
|
||||
q0 = _mm256_load_si256(&p0);\
|
||||
s1 = _mm256_load_si256(&r2);\
|
||||
q1 = _mm256_load_si256(&p2);\
|
||||
r0 = _mm256_unpackhi_epi32(r0,r1);\
|
||||
p0 = _mm256_unpackhi_epi32(p0,p1);\
|
||||
r2 = _mm256_unpackhi_epi32(r2,r3);\
|
||||
p2 = _mm256_unpackhi_epi32(p2,p3);\
|
||||
s0 = _mm256_unpacklo_epi32(s0,r1);\
|
||||
q0 = _mm256_unpacklo_epi32(q0,p1);\
|
||||
s1 = _mm256_unpacklo_epi32(s1,r3);\
|
||||
q1 = _mm256_unpacklo_epi32(q1,p3);\
|
||||
r1 = _mm256_load_si256(&r0);\
|
||||
p1 = _mm256_load_si256(&p0);\
|
||||
r0 = _mm256_unpackhi_epi64(r0,r2);\
|
||||
p0 = _mm256_unpackhi_epi64(p0,p2);\
|
||||
s0 = _mm256_unpackhi_epi64(s0,s1);\
|
||||
q0 = _mm256_unpackhi_epi64(q0,q1);\
|
||||
r1 = _mm256_unpacklo_epi64(r1,r2);\
|
||||
p1 = _mm256_unpacklo_epi64(p1,p2);\
|
||||
s2 = _mm256_load_si256(&r0);\
|
||||
q2 = _mm256_load_si256(&p0);\
|
||||
s1 = _mm256_load_si256(&r1);\
|
||||
q1 = _mm256_load_si256(&p1);\
|
||||
|
||||
#define NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\
|
||||
s1 = _mm256_load_si256(&r3);\
|
||||
q1 = _mm256_load_si256(&p3);\
|
||||
s3 = _mm256_load_si256(&r3);\
|
||||
q3 = _mm256_load_si256(&p3);\
|
||||
s1 = _mm256_unpackhi_epi32(s1,r2);\
|
||||
q1 = _mm256_unpackhi_epi32(q1,p2);\
|
||||
s3 = _mm256_unpacklo_epi32(s3,r2);\
|
||||
q3 = _mm256_unpacklo_epi32(q3,p2);\
|
||||
s0 = _mm256_load_si256(&s1);\
|
||||
q0 = _mm256_load_si256(&q1);\
|
||||
s2 = _mm256_load_si256(&s3);\
|
||||
q2 = _mm256_load_si256(&q3);\
|
||||
r3 = _mm256_load_si256(&r1);\
|
||||
p3 = _mm256_load_si256(&p1);\
|
||||
r1 = _mm256_unpacklo_epi32(r1,r0);\
|
||||
p1 = _mm256_unpacklo_epi32(p1,p0);\
|
||||
r3 = _mm256_unpackhi_epi32(r3,r0);\
|
||||
p3 = _mm256_unpackhi_epi32(p3,p0);\
|
||||
s0 = _mm256_unpackhi_epi64(s0,r3);\
|
||||
q0 = _mm256_unpackhi_epi64(q0,p3);\
|
||||
s1 = _mm256_unpacklo_epi64(s1,r3);\
|
||||
q1 = _mm256_unpacklo_epi64(q1,p3);\
|
||||
s2 = _mm256_unpackhi_epi64(s2,r1);\
|
||||
q2 = _mm256_unpackhi_epi64(q2,p1);\
|
||||
s3 = _mm256_unpacklo_epi64(s3,r1);\
|
||||
q3 = _mm256_unpacklo_epi64(q3,p1);
|
||||
|
||||
#define MIXTON1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\
|
||||
NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3);
|
||||
|
||||
/* initial values of chaining variables */
|
||||
static const uint32 IV[40] __attribute((aligned(32))) = {
|
||||
0xdbf78465,0x4eaa6fb4,0x44b051e0,0x6d251e69,
|
||||
0xdef610bb,0xee058139,0x90152df4,0x6e292011,
|
||||
0xde099fa3,0x70eee9a0,0xd9d2f256,0xc3b44b95,
|
||||
0x746cd581,0xcf1ccf0e,0x8fc944b3,0x5d9b0557,
|
||||
0xad659c05,0x04016ce5,0x5dba5781,0xf7efc89d,
|
||||
0x8b264ae7,0x24aa230a,0x666d1836,0x0306194f,
|
||||
0x204b1f67,0xe571f7d7,0x36d79cce,0x858075d5,
|
||||
0x7cde72ce,0x14bcb808,0x57e9e923,0x35870c6a,
|
||||
0xaffb4363,0xc825b7c7,0x5ec41e22,0x6c68e9be,
|
||||
0x03e86cea,0xb07224cc,0x0fc688f1,0xf5df3999
|
||||
};
|
||||
|
||||
/* Round Constants */
|
||||
static const uint32 CNS_INIT[128] __attribute((aligned(32))) = {
|
||||
0xb213afa5,0xfc20d9d2,0xb6de10ed,0x303994a6,
|
||||
0xe028c9bf,0xe25e72c1,0x01685f3d,0xe0337818,
|
||||
0xc84ebe95,0x34552e25,0x70f47aae,0xc0e65299,
|
||||
0x44756f91,0xe623bb72,0x05a17cf4,0x441ba90d,
|
||||
0x4e608a22,0x7ad8818f,0x0707a3d4,0x6cc33a12,
|
||||
0x7e8fce32,0x5c58a4a4,0xbd09caca,0x7f34d442,
|
||||
0x56d858fe,0x8438764a,0x1c1e8f51,0xdc56983e,
|
||||
0x956548be,0x1e38e2e7,0xf4272b28,0x9389217f,
|
||||
0x343b138f,0xbb6de032,0x707a3d45,0x1e00108f,
|
||||
0xfe191be2,0x78e38b9d,0x144ae5cc,0xe5a8bce6,
|
||||
0xd0ec4e3d,0xedb780c8,0xaeb28562,0x7800423d,
|
||||
0x3cb226e5,0x27586719,0xfaa7ae2b,0x5274baf4,
|
||||
0x2ceb4882,0xd9847356,0xbaca1589,0x8f5b7882,
|
||||
0x5944a28e,0x36eda57f,0x2e48f1c1,0x26889ba7,
|
||||
0xb3ad2208,0xa2c78434,0x40a46f3e,0x96e1db12,
|
||||
0xa1c4c355,0x703aace7,0xb923c704,0x9a226e9d,
|
||||
0x00000000,0x00000000,0x00000000,0xf0d2e9e3,
|
||||
0x00000000,0x00000000,0x00000000,0x5090d577,
|
||||
0x00000000,0x00000000,0x00000000,0xac11d7fa,
|
||||
0x00000000,0x00000000,0x00000000,0x2d1925ab,
|
||||
0x00000000,0x00000000,0x00000000,0x1bcb66f2,
|
||||
0x00000000,0x00000000,0x00000000,0xb46496ac,
|
||||
0x00000000,0x00000000,0x00000000,0x6f2d9bc9,
|
||||
0x00000000,0x00000000,0x00000000,0xd1925ab0,
|
||||
0x00000000,0x00000000,0x00000000,0x78602649,
|
||||
0x00000000,0x00000000,0x00000000,0x29131ab6,
|
||||
0x00000000,0x00000000,0x00000000,0x8edae952,
|
||||
0x00000000,0x00000000,0x00000000,0x0fc053c3,
|
||||
0x00000000,0x00000000,0x00000000,0x3b6ba548,
|
||||
0x00000000,0x00000000,0x00000000,0x3f014f0c,
|
||||
0x00000000,0x00000000,0x00000000,0xedae9520,
|
||||
0x00000000,0x00000000,0x00000000,0xfc053c31
|
||||
};
|
||||
|
||||
__m256i CNS[32];
|
||||
|
||||
/***************************************************/
|
||||
/* Round function */
|
||||
/* state: hash context */
|
||||
|
||||
void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
{
|
||||
__m256i t0, t1;
|
||||
__m256i *chainv = state->chainv;
|
||||
__m256i msg0, msg1;
|
||||
__m256i tmp[2];
|
||||
__m256i x[8];
|
||||
|
||||
t0 = chainv[0];
|
||||
t1 = chainv[1];
|
||||
|
||||
t0 = _mm256_xor_si256( t0, chainv[2] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[3] );
|
||||
t0 = _mm256_xor_si256( t0, chainv[4] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[5] );
|
||||
t0 = _mm256_xor_si256( t0, chainv[6] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[7] );
|
||||
t0 = _mm256_xor_si256( t0, chainv[8] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[9] );
|
||||
|
||||
MULT2( t0, t1 );
|
||||
|
||||
msg0 = _mm256_shuffle_epi32( msg[0], 27 );
|
||||
msg1 = _mm256_shuffle_epi32( msg[1], 27 );
|
||||
|
||||
chainv[0] = _mm256_xor_si256( chainv[0], t0 );
|
||||
chainv[1] = _mm256_xor_si256( chainv[1], t1 );
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], t0 );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], t1 );
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], t0 );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], t1 );
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], t0 );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], t1 );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], t0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], t1 );
|
||||
|
||||
t0 = chainv[0];
|
||||
t1 = chainv[1];
|
||||
|
||||
MULT2( chainv[0], chainv[1]);
|
||||
chainv[0] = _mm256_xor_si256( chainv[0], chainv[2] );
|
||||
chainv[1] = _mm256_xor_si256( chainv[1], chainv[3] );
|
||||
|
||||
MULT2( chainv[2], chainv[3]);
|
||||
chainv[2] = _mm256_xor_si256(chainv[2], chainv[4]);
|
||||
chainv[3] = _mm256_xor_si256(chainv[3], chainv[5]);
|
||||
|
||||
MULT2( chainv[4], chainv[5]);
|
||||
chainv[4] = _mm256_xor_si256(chainv[4], chainv[6]);
|
||||
chainv[5] = _mm256_xor_si256(chainv[5], chainv[7]);
|
||||
|
||||
MULT2( chainv[6], chainv[7]);
|
||||
chainv[6] = _mm256_xor_si256(chainv[6], chainv[8]);
|
||||
chainv[7] = _mm256_xor_si256(chainv[7], chainv[9]);
|
||||
|
||||
MULT2( chainv[8], chainv[9]);
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], t0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], t1 );
|
||||
|
||||
t0 = chainv[8];
|
||||
t1 = chainv[9];
|
||||
|
||||
MULT2( chainv[8], chainv[9]);
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], chainv[6] );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], chainv[7] );
|
||||
|
||||
MULT2( chainv[6], chainv[7]);
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], chainv[4] );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], chainv[5] );
|
||||
|
||||
MULT2( chainv[4], chainv[5]);
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], chainv[2] );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], chainv[3] );
|
||||
|
||||
MULT2( chainv[2], chainv[3] );
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], chainv[0] );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], chainv[1] );
|
||||
|
||||
MULT2( chainv[0], chainv[1] );
|
||||
chainv[0] = _mm256_xor_si256( _mm256_xor_si256( chainv[0], t0 ), msg0 );
|
||||
chainv[1] = _mm256_xor_si256( _mm256_xor_si256( chainv[1], t1 ), msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], msg0 );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], msg0 );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], msg0 );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], msg0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], msg1 );
|
||||
|
||||
MULT2( msg0, msg1);
|
||||
|
||||
chainv[3] = _mm256_or_si256( _mm256_slli_epi32( chainv[3], 1 ),
|
||||
_mm256_srli_epi32( chainv[3], 31 ) );
|
||||
chainv[5] = _mm256_or_si256( _mm256_slli_epi32( chainv[5], 2 ),
|
||||
_mm256_srli_epi32( chainv[5], 30 ) );
|
||||
chainv[7] = _mm256_or_si256( _mm256_slli_epi32( chainv[7], 3 ),
|
||||
_mm256_srli_epi32( chainv[7], 29 ) );
|
||||
chainv[9] = _mm256_or_si256( _mm256_slli_epi32( chainv[9], 4 ),
|
||||
_mm256_srli_epi32( chainv[9], 28 ) );
|
||||
|
||||
NMLTOM1024( chainv[0], chainv[2], chainv[4], chainv[6],
|
||||
x[0], x[1], x[2], x[3],
|
||||
chainv[1],chainv[3],chainv[5],chainv[7],
|
||||
x[4], x[5], x[6], x[7] );
|
||||
|
||||
STEP_PART( &x[0], &CNS[ 0], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[ 2], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[ 4], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[ 6], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[ 8], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[10], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[12], &tmp[0] );
|
||||
STEP_PART( &x[0], &CNS[14], &tmp[0] );
|
||||
|
||||
MIXTON1024( x[0], x[1], x[2], x[3],
|
||||
chainv[0], chainv[2], chainv[4],chainv[6],
|
||||
x[4], x[5], x[6], x[7],
|
||||
chainv[1],chainv[3],chainv[5],chainv[7]);
|
||||
|
||||
/* Process last 256-bit block */
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[16], CNS[17],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[18], CNS[19],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[20], CNS[21],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[22], CNS[23],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[24], CNS[25],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[26], CNS[27],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[28], CNS[29],
|
||||
tmp[0], tmp[1] );
|
||||
STEP_PART2( chainv[8], chainv[9], t0, t1, CNS[30], CNS[31],
|
||||
tmp[0], tmp[1] );
|
||||
}
|
||||
|
||||
|
||||
/***************************************************/
|
||||
/* Finalization function */
|
||||
/* state: hash context */
|
||||
/* b[8]: hash values */
|
||||
|
||||
void finalization512_2way( luffa_2way_context *state, uint32 *b )
|
||||
{
|
||||
uint32 hash[8] __attribute((aligned(64)));
|
||||
__m256i* chainv = state->chainv;
|
||||
__m256i t[2];
|
||||
__m256i zero[2];
|
||||
zero[0] = zero[1] = _mm256_setzero_si256();
|
||||
|
||||
/*---- blank round with m=0 ----*/
|
||||
rnd512_2way( state, zero );
|
||||
|
||||
t[0] = chainv[0];
|
||||
t[1] = chainv[1];
|
||||
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[2] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[3] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[4] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[5] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[6] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[7] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[8] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[9] );
|
||||
|
||||
t[0] = _mm256_shuffle_epi32( t[0], 27 );
|
||||
t[1] = _mm256_shuffle_epi32( t[1], 27 );
|
||||
|
||||
_mm256_store_si256( (__m256i*)&hash[0], t[0] );
|
||||
_mm256_store_si256( (__m256i*)&hash[8], t[1] );
|
||||
|
||||
casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
|
||||
|
||||
rnd512_2way( state, zero );
|
||||
|
||||
t[0] = chainv[0];
|
||||
t[1] = chainv[1];
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[2] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[3] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[4] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[5] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[6] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[7] );
|
||||
t[0] = _mm256_xor_si256( t[0], chainv[8] );
|
||||
t[1] = _mm256_xor_si256( t[1], chainv[9] );
|
||||
|
||||
t[0] = _mm256_shuffle_epi32( t[0], 27 );
|
||||
t[1] = _mm256_shuffle_epi32( t[1], 27 );
|
||||
|
||||
_mm256_store_si256( (__m256i*)&hash[0], t[0] );
|
||||
_mm256_store_si256( (__m256i*)&hash[8], t[1] );
|
||||
|
||||
casti_m256i( b, 2 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 3 ) = mm256_bswap_32( casti_m256i( hash, 1 ) );
|
||||
}
|
||||
|
||||
int luffa_2way_init( luffa_2way_context *state, int hashbitlen )
|
||||
{
|
||||
int i;
|
||||
state->hashbitlen = hashbitlen;
|
||||
|
||||
for ( i=0; i<32; i++ ) CNS[i] =
|
||||
_mm256_set_epi32( CNS_INIT[ (i<<2) + 3 ], CNS_INIT[ (i<<2) +2 ],
|
||||
CNS_INIT[ (i<<2) + 1 ], CNS_INIT[ (i<<2) ],
|
||||
CNS_INIT[ (i<<2) + 3 ], CNS_INIT[ (i<<2) +2 ],
|
||||
CNS_INIT[ (i<<2) + 1 ], CNS_INIT[ (i<<2) ] );
|
||||
|
||||
for ( i=0; i<10; i++ ) state->chainv[i] =
|
||||
_mm256_set_epi32( IV[ (i<<2) +3 ], IV[ (i<<2) +2 ],
|
||||
IV[ (i<<2) +1 ], IV[ (i<<2) ],
|
||||
IV[ (i<<2) +3 ], IV[ (i<<2) +2 ],
|
||||
IV[ (i<<2) +1 ], IV[ (i<<2) ] );
|
||||
|
||||
((__m256i*)state->buffer)[0] = m256_zero;
|
||||
((__m256i*)state->buffer)[1] = m256_zero;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Do not call luffa_update_close after having called luffa_update.
|
||||
// Once luffa_update has been called only call luffa_update or luffa_close.
|
||||
int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
size_t len )
|
||||
{
|
||||
__m256i *vdata = (__m256i*)data;
|
||||
__m256i *buffer = (__m256i*)state->buffer;
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
int blocks = (int)len >> 5;
|
||||
state-> rembytes = (int)len & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = mm256_bswap_32( vdata[ 0] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
// 16 byte partial block exists for 80 byte len
|
||||
// store in buffer for transform in final for midstate to work
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// remaining data bytes
|
||||
buffer[0] = mm256_bswap_32( vdata[0] );
|
||||
buffer[1] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int luffa_2way_close( luffa_2way_context *state, void *hashval )
|
||||
{
|
||||
__m256i *buffer = (__m256i*)state->buffer;
|
||||
__m256i msg[2];
|
||||
|
||||
// transform pad block
|
||||
if ( state->rembytes )
|
||||
// not empty, data is in buffer
|
||||
rnd512_2way( state, buffer );
|
||||
else
|
||||
{ // empty pad block, constant data
|
||||
msg[0] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
msg[1] = m256_zero;
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
finalization512_2way( state, (uint32*)hashval );
|
||||
|
||||
if ( state->hashbitlen > 512 )
|
||||
finalization512_2way( state, (uint32*)( hashval+32 ) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int luffa_2way_update_close( luffa_2way_context *state,
|
||||
void *output, const void *data, size_t inlen )
|
||||
{
|
||||
// Optimized for integrals of 16 bytes, good for 64 and 80 byte len
|
||||
const __m256i *vdata = (__m256i*)data;
|
||||
__m256i msg[2];
|
||||
int i;
|
||||
const int blocks = (int)( inlen >> 5 );
|
||||
state->rembytes = inlen & 0x1F;
|
||||
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++, vdata+=2 )
|
||||
{
|
||||
msg[0] = mm256_bswap_32( vdata[ 0 ] );
|
||||
msg[1] = mm256_bswap_32( vdata[ 1 ] );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
// 16 byte partial block exists for 80 byte len
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// padding of partial block
|
||||
msg[0] = mm256_bswap_32( vdata[0] );
|
||||
msg[1] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
else
|
||||
{
|
||||
// empty pad block
|
||||
msg[0] = _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0,
|
||||
0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
msg[1] = m256_zero;
|
||||
rnd512_2way( state, msg );
|
||||
}
|
||||
|
||||
finalization512_2way( state, (uint32*)output );
|
||||
if ( state->hashbitlen > 512 )
|
||||
finalization512_2way( state, (uint32*)( output+32 ) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
69
algo/luffa/luffa-hash-2way.h
Normal file
69
algo/luffa/luffa-hash-2way.h
Normal file
@@ -0,0 +1,69 @@
|
||||
#if !defined(LUFFA_HASH_2WAY_H__)
|
||||
#define LUFFA_HASH_2WAY_H__ 1
|
||||
/*
|
||||
* luffa_for_sse2.h
|
||||
* Version 2.0 (Sep 15th 2009)
|
||||
*
|
||||
* Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
|
||||
*
|
||||
* Hitachi, Ltd. is the owner of this software and hereby grant
|
||||
* the U.S. Government and any interested party the right to use
|
||||
* this software for the purposes of the SHA-3 evaluation process,
|
||||
* notwithstanding that this software is copyrighted.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "algo/sha/sha3-defs.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
/* The length of digests*/
|
||||
#define DIGEST_BIT_LEN_224 224
|
||||
#define DIGEST_BIT_LEN_256 256
|
||||
#define DIGEST_BIT_LEN_384 384
|
||||
#define DIGEST_BIT_LEN_512 512
|
||||
|
||||
/*********************************/
|
||||
/* The parameters of Luffa */
|
||||
#define MSG_BLOCK_BIT_LEN 256 /*The bit length of a message block*/
|
||||
#define MSG_BLOCK_BYTE_LEN (MSG_BLOCK_BIT_LEN >> 3) /* The byte length
|
||||
* of a message block*/
|
||||
|
||||
/* The number of blocks in Luffa */
|
||||
#define WIDTH_224 3
|
||||
#define WIDTH_256 3
|
||||
#define WIDTH_384 4
|
||||
#define WIDTH_512 5
|
||||
|
||||
/* The limit of the length of message */
|
||||
#define LIMIT_224 64
|
||||
#define LIMIT_256 64
|
||||
#define LIMIT_384 128
|
||||
#define LIMIT_512 128
|
||||
/*********************************/
|
||||
|
||||
typedef struct {
|
||||
uint32 buffer[8*2] __attribute((aligned(64)));
|
||||
__m256i chainv[10] __attribute((aligned(32))); /* Chaining values */
|
||||
int hashbitlen;
|
||||
int rembytes;
|
||||
} luffa_2way_context;
|
||||
|
||||
int luffa_2way_init( luffa_2way_context *state, int hashbitlen );
|
||||
int luffa_2way_update( luffa_2way_context *state, const void *data,
|
||||
size_t len );
|
||||
int luffa_2way_close( luffa_2way_context *state, void *hashval );
|
||||
int luffa_2way_update_close( luffa_2way_context *state, void *output,
|
||||
const void *data, size_t inlen );
|
||||
|
||||
#endif
|
||||
#endif
|
@@ -1,4 +1,3 @@
|
||||
#include "miner.h"
|
||||
#include "algo-gate-api.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
@@ -272,8 +272,8 @@ HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
{
|
||||
rnd512( state, mm_byteswap_epi32( casti_m128i( data, 1 ) ),
|
||||
mm_byteswap_epi32( casti_m128i( data, 0 ) ) );
|
||||
rnd512( state, mm_bswap_32( casti_m128i( data, 1 ) ),
|
||||
mm_bswap_32( casti_m128i( data, 0 ) ) );
|
||||
data += MSG_BLOCK_BYTE_LEN;
|
||||
}
|
||||
|
||||
@@ -282,7 +282,7 @@ HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
|
||||
if ( state->rembytes )
|
||||
{
|
||||
// remaining data bytes
|
||||
casti_m128i( state->buffer, 0 ) = mm_byteswap_epi32( cast_m128i( data ) );
|
||||
casti_m128i( state->buffer, 0 ) = mm_bswap_32( cast_m128i( data ) );
|
||||
// padding of partial block
|
||||
casti_m128i( state->buffer, 1 ) =
|
||||
_mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
|
||||
@@ -324,8 +324,8 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
|
||||
// full blocks
|
||||
for ( i = 0; i < blocks; i++ )
|
||||
{
|
||||
rnd512( state, mm_byteswap_epi32( casti_m128i( data, 1 ) ),
|
||||
mm_byteswap_epi32( casti_m128i( data, 0 ) ) );
|
||||
rnd512( state, mm_bswap_32( casti_m128i( data, 1 ) ),
|
||||
mm_bswap_32( casti_m128i( data, 0 ) ) );
|
||||
data += MSG_BLOCK_BYTE_LEN;
|
||||
}
|
||||
|
||||
@@ -334,7 +334,7 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
|
||||
{
|
||||
// padding of partial block
|
||||
rnd512( state, _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ),
|
||||
mm_byteswap_epi32( cast_m128i( data ) ) );
|
||||
mm_bswap_32( cast_m128i( data ) ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -542,7 +542,7 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
|
||||
_mm256_store_si256( (__m256i*)hash, t );
|
||||
|
||||
casti_m256i( b, 0 ) = mm256_byteswap_epi32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 0 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
|
||||
rnd512( state, zero, zero );
|
||||
|
||||
@@ -555,7 +555,7 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
|
||||
_mm256_store_si256( (__m256i*)hash, t );
|
||||
|
||||
casti_m256i( b, 1 ) = mm256_byteswap_epi32( casti_m256i( hash, 0 ) );
|
||||
casti_m256i( b, 1 ) = mm256_bswap_32( casti_m256i( hash, 0 ) );
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -587,8 +587,8 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
_mm_store_si128((__m128i*)&hash[0], t[0]);
|
||||
_mm_store_si128((__m128i*)&hash[4], t[1]);
|
||||
|
||||
casti_m128i( b, 0 ) = mm_byteswap_epi32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 1 ) = mm_byteswap_epi32( casti_m128i( hash, 1 ) );
|
||||
casti_m128i( b, 0 ) = mm_bswap_32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 1 ) = mm_bswap_32( casti_m128i( hash, 1 ) );
|
||||
|
||||
rnd512( state, zero, zero );
|
||||
|
||||
@@ -609,8 +609,8 @@ static void finalization512( hashState_luffa *state, uint32 *b )
|
||||
_mm_store_si128((__m128i*)&hash[0], t[0]);
|
||||
_mm_store_si128((__m128i*)&hash[4], t[1]);
|
||||
|
||||
casti_m128i( b, 2 ) = mm_byteswap_epi32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 3 ) = mm_byteswap_epi32( casti_m128i( hash, 1 ) );
|
||||
casti_m128i( b, 2 ) = mm_bswap_32( casti_m128i( hash, 0 ) );
|
||||
casti_m128i( b, 3 ) = mm_bswap_32( casti_m128i( hash, 1 ) );
|
||||
}
|
||||
#endif
|
||||
|
138
algo/lyra2/allium-4way.c
Normal file
138
algo/lyra2/allium-4way.c
Normal file
@@ -0,0 +1,138 @@
|
||||
#include "allium-gate.h"
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
#if defined (ALLIUM_4WAY)
|
||||
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/skein/skein-hash-4way.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
|
||||
typedef struct {
|
||||
blake256_4way_context blake;
|
||||
keccak256_4way_context keccak;
|
||||
cubehashParam cube;
|
||||
skein256_4way_context skein;
|
||||
hashState_groestl256 groestl;
|
||||
|
||||
} allium_4way_ctx_holder;
|
||||
|
||||
static __thread allium_4way_ctx_holder allium_4way_ctx;
|
||||
|
||||
bool init_allium_4way_ctx()
|
||||
{
|
||||
keccak256_4way_init( &allium_4way_ctx.keccak );
|
||||
cubehashInit( &allium_4way_ctx.cube, 256, 16, 32 );
|
||||
skein256_4way_init( &allium_4way_ctx.skein );
|
||||
init_groestl256( &allium_4way_ctx.groestl, 32 );
|
||||
return true;
|
||||
}
|
||||
|
||||
void allium_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
uint32_t vhash32[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash64[8*4] __attribute__ ((aligned (64)));
|
||||
allium_4way_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &allium_4way_ctx, sizeof(allium_4way_ctx) );
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash32 );
|
||||
|
||||
mm256_reinterleave_4x64( vhash64, vhash32, 256 );
|
||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0, (const byte*)hash0, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*)hash1, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*)hash2, 32 );
|
||||
cubehashReinit( &ctx.cube );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*)hash3, 32 );
|
||||
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
|
||||
mm256_interleave_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
sizeof(hashState_groestl256) );
|
||||
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
blake256_4way_init( &allium_4way_ctx.blake );
|
||||
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
allium_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
22
algo/lyra2/allium-gate.c
Normal file
22
algo/lyra2/allium-gate.c
Normal file
@@ -0,0 +1,22 @@
|
||||
#include "allium-gate.h"
|
||||
|
||||
int64_t get_max64_0xFFFFLL() { return 0xFFFFLL; }
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ALLIUM_4WAY)
|
||||
gate->miner_thread_init = (void*)&init_allium_4way_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium_4way;
|
||||
gate->hash = (void*)&allium_4way_hash;
|
||||
#else
|
||||
gate->miner_thread_init = (void*)&init_allium_ctx;
|
||||
gate->scanhash = (void*)&scanhash_allium;
|
||||
gate->hash = (void*)&allium_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->set_target = (void*)&alt_set_target;
|
||||
gate->get_max64 = (void*)&get_max64_0xFFFFLL;
|
||||
return true;
|
||||
};
|
||||
|
||||
|
29
algo/lyra2/allium-gate.h
Normal file
29
algo/lyra2/allium-gate.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#ifndef ALLIUM_GATE_H__
|
||||
#define ALLIUM_GATE_H__ 1
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
#include "lyra2.h"
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
#define ALLIUM_4WAY
|
||||
#endif
|
||||
|
||||
bool register_allium_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(ALLIUM_4WAY)
|
||||
|
||||
void allium_4way_hash( void *state, const void *input );
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
bool init_allium_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void allium_hash( void *state, const void *input );
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
bool init_allium_ctx();
|
||||
|
||||
#endif
|
||||
|
112
algo/lyra2/allium.c
Normal file
112
algo/lyra2/allium.c
Normal file
@@ -0,0 +1,112 @@
|
||||
#include "allium-gate.h"
|
||||
#include <memory.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#if defined(__AES__)
|
||||
#include "algo/groestl/aes_ni/hash-groestl256.h"
|
||||
#else
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#endif
|
||||
#include "lyra2.h"
|
||||
|
||||
typedef struct {
|
||||
sph_blake256_context blake;
|
||||
sph_keccak256_context keccak;
|
||||
cubehashParam cube;
|
||||
sph_skein256_context skein;
|
||||
#if defined (__AES__)
|
||||
hashState_groestl256 groestl;
|
||||
#else
|
||||
sph_groestl256_context groestl;
|
||||
#endif
|
||||
} allium_ctx_holder;
|
||||
|
||||
static __thread allium_ctx_holder allium_ctx;
|
||||
|
||||
bool init_allium_ctx()
|
||||
{
|
||||
sph_keccak256_init( &allium_ctx.keccak );
|
||||
cubehashInit( &allium_ctx.cube, 256, 16, 32 );
|
||||
sph_skein256_init( &allium_ctx.skein );
|
||||
#if defined (__AES__)
|
||||
init_groestl256( &allium_ctx.groestl, 32 );
|
||||
#else
|
||||
sph_groestl256_init( &allium_ctx.groestl );
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
void allium_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
allium_ctx_holder ctx __attribute__ ((aligned (32)));
|
||||
|
||||
memcpy( &ctx, &allium_ctx, sizeof(allium_ctx) );
|
||||
sph_blake256( &ctx.blake, input + 64, 16 );
|
||||
sph_blake256_close( &ctx.blake, hash );
|
||||
|
||||
sph_keccak256( &ctx.keccak, hash, 32 );
|
||||
sph_keccak256_close( &ctx.keccak, hash );
|
||||
|
||||
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash, (const byte*)hash, 32 );
|
||||
|
||||
LYRA2RE( hash, 32, hash, 32, hash, 32, 1, 8, 8 );
|
||||
|
||||
sph_skein256( &ctx.skein, hash, 32 );
|
||||
sph_skein256_close( &ctx.skein, hash );
|
||||
|
||||
#if defined (__AES__)
|
||||
update_and_final_groestl256( &ctx.groestl, hash, hash, 256 );
|
||||
#else
|
||||
sph_groestl256( &ctx.groestl, hash, 32 );
|
||||
sph_groestl256_close( &ctx.groestl, hash );
|
||||
#endif
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x3ffff;
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
sph_blake256_init( &allium_ctx.blake );
|
||||
sph_blake256( &allium_ctx.blake, endiandata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], nonce );
|
||||
allium_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -47,8 +47,9 @@
|
||||
*/
|
||||
|
||||
int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, const void *salt, uint64_t saltlen,
|
||||
uint64_t timeCost, const uint64_t nRows, const uint64_t nCols )
|
||||
const uint64_t pwdlen, const void *salt, const uint64_t saltlen,
|
||||
const uint64_t timeCost, const uint64_t nRows,
|
||||
const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
@@ -73,6 +74,8 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
|
||||
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//=== Getting the password + salt + basil padded with 10*1 ==========//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
@@ -209,8 +212,9 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
}
|
||||
|
||||
int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, const void *salt, uint64_t saltlen,
|
||||
uint64_t timeCost, uint64_t nRows, uint64_t nCols )
|
||||
const uint64_t pwdlen, const void *salt, const uint64_t saltlen,
|
||||
const uint64_t timeCost, const uint64_t nRows,
|
||||
const uint64_t nCols )
|
||||
{
|
||||
//========================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
@@ -230,6 +234,8 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
|
||||
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
||||
|
||||
// memset( wholeMatrix, 0, ROW_LEN_BYTES * nRows );
|
||||
|
||||
//==== Getting the password + salt + basil padded with 10*1 ============//
|
||||
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
|
||||
//but this ensures that the password copied locally will be overwritten as soon as possible
|
||||
@@ -347,9 +353,9 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
|
||||
}
|
||||
|
||||
// Lyra2RE doesn't like the new wholeMatrix implementation
|
||||
int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, const void *salt, uint64_t saltlen,
|
||||
uint64_t timeCost, const uint64_t nRows, const uint64_t nCols )
|
||||
int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,
|
||||
const void *salt, const uint64_t saltlen, const uint64_t timeCost,
|
||||
const uint64_t nRows, const uint64_t nCols )
|
||||
{
|
||||
//====================== Basic variables ============================//
|
||||
uint64_t _ALIGN(256) state[16];
|
||||
@@ -378,12 +384,12 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
|
||||
if (wholeMatrix == NULL)
|
||||
return -1;
|
||||
|
||||
#if defined (__AVX2__)
|
||||
memset_zero_m256i( (__m256i*)wholeMatrix, i/32 );
|
||||
#if defined(__AVX2__)
|
||||
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
|
||||
#elif defined(__AVX__)
|
||||
memset_zero_m128i( (__m128i*)wholeMatrix, i/16 );
|
||||
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
|
||||
#else
|
||||
memset(wholeMatrix, 0, i);
|
||||
memset( wholeMatrix, 0, i );
|
||||
#endif
|
||||
|
||||
uint64_t *ptrWord = wholeMatrix;
|
||||
@@ -406,8 +412,8 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd,
|
||||
memcpy(ptrByte, salt, saltlen);
|
||||
ptrByte += saltlen;
|
||||
|
||||
memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
|
||||
- (saltlen + pwdlen) );
|
||||
// memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
|
||||
// - (saltlen + pwdlen) );
|
||||
|
||||
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
|
||||
memcpy(ptrByte, &kLen, sizeof(int64_t));
|
||||
|
@@ -21,8 +21,9 @@
|
||||
#define LYRA2_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "algo/sha/sha3-defs.h"
|
||||
|
||||
typedef unsigned char byte;
|
||||
//typedef unsigned char byte;
|
||||
|
||||
//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
|
||||
#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t)
|
||||
@@ -53,4 +54,6 @@ int LYRA2Z( uint64_t*, void *K, uint64_t kLen, const void *pwd,
|
||||
uint64_t pwdlen, const void *salt, uint64_t saltlen,
|
||||
uint64_t timeCost, uint64_t nRows, uint64_t nCols );
|
||||
|
||||
int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int16_t nRows, const int16_t nCols);
|
||||
|
||||
#endif /* LYRA2_H_ */
|
||||
|
101
algo/lyra2/lyra2h-4way.c
Normal file
101
algo/lyra2/lyra2h-4way.c
Normal file
@@ -0,0 +1,101 @@
|
||||
#include "lyra2h-gate.h"
|
||||
|
||||
#ifdef LYRA2H_4WAY
|
||||
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
|
||||
__thread uint64_t* lyra2h_4way_matrix;
|
||||
|
||||
bool lyra2h_4way_thread_init()
|
||||
{
|
||||
return ( lyra2h_4way_matrix = _mm_malloc( LYRA2H_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static __thread blake256_4way_context l2h_4way_blake_mid;
|
||||
|
||||
void lyra2h_4way_midstate( const void* input )
|
||||
{
|
||||
blake256_4way_init( &l2h_4way_blake_mid );
|
||||
blake256_4way( &l2h_4way_blake_mid, input, 64 );
|
||||
}
|
||||
|
||||
void lyra2h_4way_hash( void *state, const void *input )
|
||||
{
|
||||
uint32_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2[8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash3[8] __attribute__ ((aligned (64)));
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
blake256_4way_context ctx_blake __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx_blake, &l2h_4way_blake_mid, sizeof l2h_4way_blake_mid );
|
||||
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
|
||||
mm_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2h_4way_matrix, hash0, 32, hash0, 32, hash0, 32, 16, 16, 16 );
|
||||
LYRA2Z( lyra2h_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 16, 16, 16 );
|
||||
LYRA2Z( lyra2h_4way_matrix, hash2, 32, hash2, 32, hash2, 32, 16, 16, 16 );
|
||||
LYRA2Z( lyra2h_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 16, 16, 16 );
|
||||
|
||||
memcpy( state, hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
memcpy( state+64, hash2, 32 );
|
||||
memcpy( state+96, hash3, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep= vdata + 76; // 19*4
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &edata[i], pdata[i] );
|
||||
|
||||
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
lyra2h_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
|
||||
be32enc( &edata[19], n );
|
||||
lyra2h_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
25
algo/lyra2/lyra2h-gate.c
Normal file
25
algo/lyra2/lyra2h-gate.c
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "lyra2h-gate.h"
|
||||
#include "lyra2.h"
|
||||
|
||||
void lyra2h_set_target( struct work* work, double job_diff )
|
||||
{
|
||||
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
|
||||
}
|
||||
|
||||
bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
{
|
||||
#ifdef LYRA2H_4WAY
|
||||
gate->miner_thread_init = (void*)&lyra2h_4way_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2h_4way;
|
||||
gate->hash = (void*)&lyra2h_4way_hash;
|
||||
#else
|
||||
gate->miner_thread_init = (void*)&lyra2h_thread_init;
|
||||
gate->scanhash = (void*)&scanhash_lyra2h;
|
||||
gate->hash = (void*)&lyra2h_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX_OPT | AVX2_OPT;
|
||||
gate->get_max64 = (void*)&get_max64_0xffffLL;
|
||||
gate->set_target = (void*)&lyra2h_set_target;
|
||||
return true;
|
||||
};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user