Compare commits

..

12 Commits

Author SHA1 Message Date
Jay D Dee
4b57ac0eb9 v3.7.4 2017-11-28 16:32:04 -05:00
Jay D Dee
6d1361c87f v3.7.3 2017-11-20 21:19:15 -05:00
Jay D Dee
ab39e88318 v3.7.2 2017-11-01 11:03:23 -04:00
Jay D Dee
8ff52e7ad6 v3.7.1 2017-10-31 00:25:24 -04:00
Jay D Dee
aaa48599ad v3.7.0 2017-10-17 11:38:59 -04:00
Jay D Dee
c76574b2cd v3.6.11 2017-10-12 15:14:37 -04:00
Jay D Dee
989fb42d20 v3.6.10 2017-10-12 11:49:40 -04:00
Jay D Dee
710c852f05 v3.6.9 2017-10-09 21:45:27 -04:00
Jay D Dee
39f089d3dc v3.6.8 2017-07-31 20:02:45 -04:00
Jay D Dee
ec4f6028a2 v3.6.7 2017-07-24 21:38:32 -04:00
Jay D Dee
f8907677f6 v3.6.6 2017-07-01 14:37:11 -04:00
Jay D Dee
7544cb956c v3.6.5 2017-05-19 16:38:26 -04:00
151 changed files with 23901 additions and 1104 deletions

1
.gitignore vendored
View File

@@ -11,7 +11,6 @@ autom4te.cache
Makefile
Makefile.in
INSTALL
configure
configure.lineno
depcomp
missing

View File

@@ -5,19 +5,31 @@
# ex: docker run -it --rm cpuminer-opt:latest -a cryptonight -o cryptonight.eu.nicehash.com:3355 -u 1MiningDW2GKzf4VQfmp4q2XoUvR6iy6PD.worker1 -p x -t 3
#
FROM ubuntu:16.04
RUN BUILD_DEPS="build-essential \
libssl-dev \
libgmp-dev \
libcurl4-openssl-dev \
libjansson-dev \
automake" && \
# Build
FROM ubuntu:16.04 as builder
apt-get update && \
apt-get install -y ${BUILD_DEPS}
RUN apt-get update \
&& apt-get install -y \
build-essential \
libssl-dev \
libgmp-dev \
libcurl4-openssl-dev \
libjansson-dev \
automake \
&& rm -rf /var/lib/apt/lists/*
COPY . /app/
RUN cd /app/ && ./build.sh
RUN cd /app/ && ./build.sh
ENTRYPOINT ["/app/cpuminer"]
# App
FROM ubuntu:16.04
RUN apt-get update \
&& apt-get install -y \
libcurl3 \
libjansson4 \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/cpuminer .
ENTRYPOINT ["./cpuminer"]
CMD ["-h"]

View File

@@ -22,29 +22,6 @@ cpuminer_SOURCES = \
api.c \
sysinfos.c \
algo-gate-api.c\
algo/groestl/sph_groestl.c \
algo/skein/sph_skein.c \
algo/bmw/sph_bmw.c \
algo/shavite/sph_shavite.c \
algo/shavite/shavite.c \
algo/echo/sph_echo.c \
algo/blake/sph_blake.c \
algo/blake/sph_blake2b.c \
algo/heavy/sph_hefty1.c \
algo/blake/mod_blakecoin.c \
algo/luffa/sph_luffa.c \
algo/cubehash/sph_cubehash.c \
algo/simd/sph_simd.c \
algo/hamsi/sph_hamsi.c \
algo/fugue/sph_fugue.c \
algo/gost/sph_gost.c \
algo/jh/sph_jh.c \
algo/keccak/sph_keccak.c \
algo/keccak/keccak.c\
algo/sha/sph_sha2.c \
algo/sha/sph_sha2big.c \
algo/shabal/sph_shabal.c \
algo/whirlpool/sph_whirlpool.c\
crypto/blake2s.c \
crypto/oaes_lib.c \
crypto/c_keccak.c \
@@ -62,26 +39,44 @@ cpuminer_SOURCES = \
algo/argon2/ar2/ar2-scrypt-jane.c \
algo/argon2/ar2/blake2b.c \
algo/axiom.c \
algo/blake/sph_blake.c \
algo/blake/blake-hash-4way.c \
algo/blake/blake-gate.c \
algo/blake/blake.c \
algo/blake/blake-4way.c \
algo/blake/sph_blake2b.c \
algo/blake/blake2b.c \
algo/blake/blake2s.c \
algo/blake/mod_blakecoin.c \
algo/blake/blakecoin.c \
algo/blake/decred-gate.c \
algo/blake/decred.c \
algo/blake/decred-4way.c \
algo/blake/pentablake-gate.c \
algo/blake/pentablake-4way.c \
algo/blake/pentablake.c \
algo/bmw/sph_bmw.c \
algo/bmw/bmw256.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/cryptonight/cryptolight.c \
algo/cryptonight/cryptonight-common.c\
algo/cryptonight/cryptonight-aesni.c\
algo/cryptonight/cryptonight.c\
algo/cubehash/sph_cubehash.c \
algo/cubehash/sse2/cubehash_sse2.c\
algo/drop.c \
algo/echo/sph_echo.c \
algo/echo/aes_ni/hash.c\
algo/fresh.c \
algo/gost/sph_gost.c \
algo/groestl/sph_groestl.c \
algo/groestl/groestl.c \
algo/groestl/myr-groestl.c \
algo/groestl/aes_ni/hash-groestl.c \
algo/groestl/aes_ni/hash-groestl256.c \
algo/fugue/sph_fugue.c \
algo/hamsi/sph_hamsi.c \
algo/haval/haval.c\
algo/heavy/sph_hefty1.c \
algo/heavy/heavy.c \
algo/heavy/bastion.c \
algo/hmq1725.c \
@@ -90,7 +85,19 @@ cpuminer_SOURCES = \
algo/hodl/hodl-wolf.c \
algo/hodl/sha512_avx.c \
algo/hodl/sha512_avx2.c \
algo/jh/sph_jh.c \
algo/jh/jh-hash-4way.c \
algo/jh/jha-gate.c \
algo/jh/jha-4way.c \
algo/jh/jha.c \
algo/keccak/sph_keccak.c \
algo/keccak/keccak.c\
algo/keccak/keccak-hash-4way.c \
algo/keccak/keccak-4way.c\
algo/keccak/keccak-gate.c \
algo/keccak/sse2/keccak.c \
algo/lbry.c \
algo/luffa/sph_luffa.c \
algo/luffa/luffa.c \
algo/luffa/sse2/luffa_for_sse2.c \
algo/lyra2/lyra2.c \
@@ -99,41 +106,65 @@ cpuminer_SOURCES = \
algo/lyra2/lyra2re.c \
algo/lyra2/zcoin.c \
algo/lyra2/lyra2z330.c \
algo/keccak/sse2/keccak.c \
algo/m7m.c \
algo/neoscrypt.c \
algo/nist5.c \
algo/nist5/nist5-gate.c \
algo/nist5/nist5-4way.c \
algo/nist5/nist5.c \
algo/pluck.c \
algo/polytimos/polytimos-gate.c \
algo/polytimos/polytimos.c \
algo/quark/quark.c \
algo/qubit/qubit.c \
algo/qubit/deep.c \
algo/ripemd/sph_ripemd.c \
algo/scrypt.c \
algo/scryptjane/scrypt-jane.c \
algo/sha/sph_sha2.c \
algo/sha/sph_sha2big.c \
algo/sha/sha2.c \
algo/sha/sha256t.c \
algo/shabal/sph_shabal.c \
algo/shavite/sph_shavite.c \
algo/shavite/shavite.c \
algo/simd/sph_simd.c \
algo/simd/sse2/nist.c \
algo/simd/sse2/vector.c \
algo/skein/sph_skein.c \
algo/skein/skein-hash-4way.c \
algo/skein/skein.c \
algo/skein/skein-4way.c \
algo/skein/skein-gate.c \
algo/skein/skein2.c \
algo/s3.c \
algo/skein/skein2-4way.c \
algo/skein/skein2-gate.c \
algo/skunk.c \
algo/sm3/sm3.c \
algo/tiger/sph_tiger.c \
algo/timetravel.c \
algo/timetravel10.c \
algo/tribus/tribus-gate.c \
algo/tribus/tribus.c \
algo/tribus/tribus-4way.c \
algo/veltor.c \
algo/whirlpool/sph_whirlpool.c \
algo/whirlpool/whirlpool-hash-4way.c \
algo/whirlpool/whirlpool-gate.c \
algo/whirlpool/whirlpool-4way.c \
algo/whirlpool/whirlpool.c \
algo/whirlpool/whirlpoolx.c \
algo/x11/phi1612.c \
algo/x11/x11.c \
algo/x11/x11evo.c \
algo/x11/x11gost.c \
algo/x11/c11.c \
algo/x13/x13.c \
algo/x13/x13sm3.c \
algo/x14/x14.c \
algo/x15/x15.c \
algo/x17/x17.c \
algo/xevan.c \
algo/yescrypt/yescrypt.c \
algo/yescrypt/yescrypt-common.c \
algo/yescrypt/sha256_Y.c\
algo/yescrypt/yescrypt-simd.c\
algo/zr5.c

View File

@@ -35,19 +35,22 @@ Supported Algorithms
heavy Heavy
hmq1725 Espers
hodl Hodlcoin
jha jackpotcoin
keccak Keccak
lbry LBC, LBRY Credits
luffa Luffa
lyra2re lyra2
lyra2rev2 lyrav2, Vertcoin
lyra2rev2 lyra2v2, Vertcoin
lyra2z Zcoin (XZC)
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
m7m Magi (XMG)
myr-gr Myriad-Groestl
neoscrypt NeoScrypt(128, 2, 1)
nist5 Nist5
pluck Pluck:128 (Supcoin)
pentablake Pentablake
phi1612 phi, LUX coin
pluck Pluck:128 (Supcoin)
polytimos
quark Quark
qubit Qubit
scrypt scrypt(1024, 1, 1) (default)
@@ -58,7 +61,10 @@ Supported Algorithms
shavite3 Shavite3
skein Skein+Sha (Skeincoin)
skein2 Double Skein (Woodcoin)
skunk Signatum (SIGT)
timetravel Machinecoin (MAC)
timetravel10 Bitcore
tribus Denarius (DNR)
vanilla blake256r8vnl (VCash)
veltor
whirlpool
@@ -67,11 +73,13 @@ Supported Algorithms
x11evo Revolvercoin
x11gost sib (SibCoin)
x13 X13
x13sm3 hsr (Hshare)
x14 X14
x15 X15
x17
xevan Bitsend
yescrypt
yescrypt Globalboost-Y (BSTY)
yescryptr16 Yenten (YTN)
zr5 Ziftr
Requirements
@@ -115,6 +123,10 @@ forum at:
https://bitcointalk.org/index.php?topic=1326803.0
All problem reports must be accompanied by a proper definition.
This should include how the problem occurred, the command line and
output from the miner showing the startup and any errors.
Donations
---------

View File

@@ -1,6 +1,9 @@
This file is included in the Windows binary package. Compile instructions
for Linux and Windows can be found in RELEASE_NOTES.
cpuminer is a console program that is executed from a DOS command prompt.
There is no GUI and no mouse support.
Choose the exe that best matches you CPU's features or use trial and
error to find the fastest one that doesn't crash. Pay attention to
the features listed at cpuminer startup to ensure you are mining at
@@ -8,15 +11,23 @@ optimum speed using all the available features.
Architecture names and compile options used are only provided for Intel
Core series. Pentium and Celeron often have fewer features.
AMD is YMMV, see previous paragraph.
Exe name Compile opts Arch name
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
these CPUs. Some algos may crash the miner with an invalid instruction.
Users are recommended to use an unoptimized miner such as cpuminer-multi.
cpuminer-sse2.exe -march=core2, Core2
cpuminer-sse42.exe -march=corei7, Nehalem
cpuminer-aes-sse42.exe -maes -msse4.2 Westmere
cpuminer-aes-avx.exe -march=corei7-avx, Sandybridge, Ivybridge
cpuminer-aes-avx2.exe -march=core-avx2, Haswell, Broadwell, Skylake, Kabylake
Exe name Compile opts Arch name
cpuminer-sse2.exe -march=core2 Core2
cpuminer-sse42.exe -march=corei7 Nehalem
cpuminer-aes-sse42.exe -maes -msse4.2" Westmere
cpuminer-aes-avx.exe -march=corei7-avx" Sandybridge, Ivybridge
cpuminer-aes-avx2.exe "-march=core-avx2" Haswell, Broadwell, Skylake, Kabylake
cpuminer-4way.exe "-march=core-avx2 -DFOUR_WAY"
4way requires a CPU with AES and AVX2. It is still under development and
only a few algos are supported. See change log in RELEASE_NOTES in source
package for supported algos.
There is no binary support available for SHA on AMD Ryzen CPUs.

View File

@@ -6,6 +6,22 @@ compile flag.
HW SHA support is only available when compiled from source, Windows binaries
are not yet available.
cpuminer-opt is a console program, if you're using a mouse you're doing it
wrong.
Security warning
----------------
Miner programs are often flagged as malware by antivirus programs. This is
a false positive, they are flagged simply because they are miners. The source
code is open for anyone to inspect. If you don't trust the software, don't use
it.
The cryptographic code has been taken from trusted sources but has been
modified for speed at the expense of accepted security practices. This
code should not be imported into applications where secure cryptography is
required.
Compile Instructions
--------------------
@@ -46,9 +62,16 @@ pthreads
zlib
SHA support on AMD Ryzen CPUs requires gcc version 5 or higher and openssl 1.1
or higher. Additional compile options may also be required such as
or higher. Reports of improved performiance on Ryzen when using openssl 1.0.2
have been due to AVX and AVX2 optimizations added to that version.
Additional improvements are expected on Ryzen with openssl 1.1.
"-march-znver1" or "-msha".
Additional instructions for static compilalation can be found here:
https://lxadm.com/Static_compilation_of_cpuminer
Static builds should only considered in a homogeneous HW and SW environment.
Local builds will always have the best performance and compatibility.
Extract cpuminer source.
tar xvzf cpuminer-opt-x.y.z.tar.gz
@@ -60,10 +83,29 @@ Run ./build.sh to build on Linux or execute the following commands.
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11" ./configure --with-curl
make
Additional optional compile flags, add the following to CFLAGS to activate:
-DUSE_SPH_SHA
SPH may give slightly better performance on algos that use sha256 when using
openssl 1.0.1 or older. Openssl 1.0.2 adds AVX2 and 1.1 adds SHA and perform
better than SPH.
-DFOUR_WAY
4 way will give much better performance on supported algos with CPUs
that have AVX2 and should only be used on CPUs with AVX2. 4 way algo
support will be added incrementally, see change log below for supported algos.
Start mining.
./cpuminer -a algo -o url -u username -p password
Windows
The following in how the Windows binary releases are built. It's old and
not very good but it works, for me anyway.
Building on Windows prerequisites:
msys
@@ -100,6 +142,10 @@ Run winbuild.sh to build on Windows or execute the following commands.
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
make
Start mining
cpuminer.exe -a algo -o url -u user -p password
The following tips may be useful for older AMD CPUs.
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
@@ -118,6 +164,82 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------
v3.7.4
Removed unnecessary build options.
Added 4way support for tribus and nist5.
v3.7.3
Added polytimos algo.
Introducing 4-way AVX2 optimization giving up to 4x performance inprovement
on many compute bound algos. First supported algos: skein, skein2, blake &
keccak. This feature is only available when compiled from source. See above
for instcuctions how to enable 4-way during compilation.
Updated Dockerfile.
v3.7.2
Fixed yescryptr16
Changed default sha256 and sha512 to openssl. This should be used when
compiling with openssl 1.0.2 or higher (Ubuntu 16.04).
This should increase the hashrate for yescrypt, yescryptr16, m7m, xevan, skein,
myr-gr & others when openssl 1.0.2 is installed.
Users with openssl 1.0.1 (Ubuntu 14.04) may get better perforance by adding
"-DUSE_SPH_SHA" to CLAGS.
Windows binaries are compiled with -DUSE_SPH_SHA and won't get the speedup.
v3.7.1
Added yescryptr16 algo for Yenten coin
Added SHA support to yescrypt and yescryptr16
Small code cleanup
v3.7.0
Fixed x14 misalignment bug.
Fixed decred stake version bug.
Getwork fixes for algos that use big endian data encoding: m7m, zr5, neoscrypt,
decred.
v3.6.10
Fixed misalignment bug in hsr.
v3.6.9
Added phi1612 algo for LUX coin
Added x13sm3 algo, alias hsr, for Hshare coin
v3.6.8
Fixed timetravel10 on Windows.
v3.6.7
Skunk algo added.
Tribus a little faster.
Minor restructuring.
v3.6.6
added tribus algo for Denarius (DNR)
configure removed from .gitignore. This should allow git clone to compile
on Windows/mingw.
Fixed CPU temperature monitoring on some CPUs (Linux only).
Fixed a compile error on FreeBSD (unsupported YMMV).
v3.6.5
Cryptonight a little faster.
Added jha algo (Jackpotcoin) with AES optimizations.
v3.6.4
Added support for Bitcore (BTX) using the timetravel10 algo, optimized for

View File

@@ -114,8 +114,8 @@ void init_algo_gate( algo_gate_t* gate )
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
gate->set_target = (void*)&std_set_target;
gate->work_decode = (void*)&std_work_decode;
gate->submit_getwork_result = (void*)&std_submit_getwork_result;
gate->work_decode = (void*)&std_le_work_decode;
gate->submit_getwork_result = (void*)&std_le_submit_getwork_result;
gate->build_extraheader = (void*)&std_build_extraheader;
gate->set_work_data_endian = (void*)&do_nothing;
gate->calc_network_diff = (void*)&std_calc_network_diff;
@@ -169,6 +169,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
case ALGO_HODL: register_hodl_algo ( gate ); break;
case ALGO_JHA: register_jha_algo ( gate ); break;
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
case ALGO_LBRY: register_lbry_algo ( gate ); break;
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
@@ -181,7 +182,9 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
case ALGO_NIST5: register_nist5_algo ( gate ); break;
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
case ALGO_QUARK: register_quark_algo ( gate ); break;
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
@@ -191,9 +194,10 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
case ALGO_SKEIN: register_skein_algo ( gate ); break;
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
case ALGO_S3: register_s3_algo ( gate ); break;
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
case ALGO_TIMETRAVEL10: register_timetravel10_algo( gate ); break;
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
@@ -202,11 +206,13 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
case ALGO_X11GOST: register_sib_algo ( gate ); break;
case ALGO_X13: register_x13_algo ( gate ); break;
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
case ALGO_X14: register_x14_algo ( gate ); break;
case ALGO_X15: register_x15_algo ( gate ); break;
case ALGO_X17: register_x17_algo ( gate ); break;
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;
// restore warnings
@@ -276,17 +282,22 @@ const char* const algo_alias_map[][2] =
{ "droplp", "drop" },
{ "espers", "hmq1725" },
{ "flax", "c11" },
{ "hsr", "x13sm3" },
{ "jackpot", "jha" },
{ "jane", "scryptjane" },
{ "lyra2", "lyra2re" },
{ "lyra2v2", "lyra2rev2" },
{ "lyra2zoin", "lyra2z330" },
{ "myriad", "myr-gr" },
{ "neo", "neoscrypt" },
{ "phi", "phi1612" },
// { "sia", "blake2b" },
{ "sib", "x11gost" },
{ "timetravel8", "timetravel" },
{ "yes", "yescrypt" },
{ "ziftr", "zr5" },
{ "yenten", "yescryptr16" },
{ "yescryptr8", "yescrypt" },
{ "zcoin", "lyra2z" },
{ "zoin", "lyra2z330" },
{ NULL, NULL }

View File

@@ -215,18 +215,20 @@ int64_t get_max64_0xffffLL();
void std_set_target ( struct work *work, double job_diff );
void scrypt_set_target( struct work *work, double job_diff );
bool std_work_decode( const json_t *val, struct work *work );
bool std_le_work_decode( const json_t *val, struct work *work );
bool std_be_work_decode( const json_t *val, struct work *work );
bool jr2_work_decode( const json_t *val, struct work *work );
bool std_submit_getwork_result( CURL *curl, struct work *work );
bool std_le_submit_getwork_result( CURL *curl, struct work *work );
bool std_be_submit_getwork_result( CURL *curl, struct work *work );
bool jr2_submit_getwork_result( CURL *curl, struct work *work );
void std_le_build_stratum_request( char *req, struct work *work );
void std_be_build_stratum_request( char *req, struct work *work );
void jr2_build_stratum_request ( char *req, struct work *work );
// set_work_data_endian target, default is do_nothing;
void swab_work_data( struct work *work );
// Default is do_nothing (assumed LE)
void set_work_data_big_endian( struct work *work );
double std_calc_network_diff( struct work *work );

View File

@@ -1,5 +1,3 @@
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>

114
algo/blake/blake-4way.c Normal file
View File

@@ -0,0 +1,114 @@
#include "blake-gate.h"
#include "sph_blake.h"
#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>
#if defined (BLAKE_4WAY)
void blakehash_4way(void *state, const void *input)
{
uint32_t hash0[16] __attribute__ ((aligned (64)));
uint32_t hash1[16] __attribute__ ((aligned (64)));
uint32_t hash2[16] __attribute__ ((aligned (64)));
uint32_t hash3[16] __attribute__ ((aligned (64)));
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
blake256_4way_context ctx;
blake256_4way_init( &ctx );
blake256_4way( &ctx, input, 16 );
blake256_4way_close( &ctx, vhash );
m128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
memcpy( state, hash0, 32 );
memcpy( state+32, hash1, 32 );
memcpy( state+64, hash1, 32 );
memcpy( state+96, hash1, 32 );
}
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
// uint32_t HTarget = ptarget[7];
uint32_t _ALIGN(32) endiandata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
// if (opt_benchmark)
// HTarget = 0x7f;
// we need big endian data...
swab32_array( endiandata, pdata, 20 );
m128_interleave_4x32( vdata, endiandata, endiandata, endiandata,
endiandata, 640 );
uint32_t *noncep = vdata + 76; // 19*4
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep, n );
be32enc( noncep +2, n+1 );
be32enc( noncep +4, n+2 );
be32enc( noncep +6, n+3 );
blakehash_4way( hash, vdata );
if ( hash[7] == 0 )
{
if ( fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
}
}
if ( (hash+8)[7] == 0 )
{
if ( fulltest( hash, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
}
}
if ( (hash+16)[7] == 0 )
{
if ( fulltest( hash, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
}
}
if ( (hash+24)[7] == 0 )
{
if ( fulltest( hash, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
}
}
n += 4;
*hashes_done = n - first_nonce + 1;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

26
algo/blake/blake-gate.c Normal file
View File

@@ -0,0 +1,26 @@
#include "blake-gate.h"
int64_t blake_get_max64 ()
{
return 0x7ffffLL;
}
bool register_blake_algo( algo_gate_t* gate )
{
gate->get_max64 = (void*)&blake_get_max64;
//#if defined (__AVX2__) && defined (FOUR_WAY)
// gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT;
// gate->scanhash = (void*)&scanhash_blake_8way;
// gate->hash = (void*)&blakehash_8way;
#if defined(BLAKE_4WAY)
gate->optimizations = SSE2_OPT | AVX_OPT;
gate->scanhash = (void*)&scanhash_blake_4way;
gate->hash = (void*)&blakehash_4way;
#else
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_blake;
gate->hash = (void*)&blakehash;
#endif
return true;
}

21
algo/blake/blake-gate.h Normal file
View File

@@ -0,0 +1,21 @@
#ifndef __BLAKE_GATE_H__
#define __BLAKE_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX__)
#define BLAKE_4WAY
#endif
#if defined (BLAKE_4WAY)
void blakehash_4way(void *state, const void *input);
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void blakehash( void *state, const void *input );
int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

1163
algo/blake/blake-hash-4way.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,105 @@
/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */
/**
* BLAKE interface. BLAKE is a family of functions which differ by their
* output size; this implementation defines BLAKE for output sizes 224,
* 256, 384 and 512 bits. This implementation conforms to the "third
* round" specification.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_blake.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef __BLAKE_HASH_4WAY__
#define __BLAKE_HASH_4WAY___
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
/**
* Output size (in bits) for BLAKE-256.
*/
#define SPH_SIZE_blake256 256
#if SPH_64
/**
* Output size (in bits) for BLAKE-512.
*/
#define SPH_SIZE_blake512 512
#endif
#ifdef __AVX__
typedef struct {
__m128i buf[16] __attribute__ ((aligned (64)));
__m128i H[8];
__m128i S[4];
size_t ptr;
sph_u32 T0, T1;
} blake_4way_small_context;
typedef blake_4way_small_context blake256_4way_context;
void blake256_4way_init(void *cc);
void blake256_4way(void *cc, const void *data, size_t len);
void blake256_4way_close(void *cc, void *dst);
void blake256_4way_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif
#ifdef __AVX2__
typedef struct {
__m256i buf[16] __attribute__ ((aligned (64)));
__m256i H[8];
__m256i S[4];
size_t ptr;
sph_u64 T0, T1;
} blake_4way_big_context;
typedef blake_4way_big_context blake512_4way_context;
void blake512_4way_init(void *cc);
void blake512_4way(void *cc, const void *data, size_t len);
void blake512_4way_close(void *cc, void *dst);
void blake512_4way_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "sph_blake.h"
@@ -90,18 +89,3 @@ int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
return 0;
}
// changed to get_max64_0x3fffffLL in cpuminer-multi-decred
int64_t blake_get_max64 ()
{
return 0x7ffffLL;
}
bool register_blake_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_blake;
gate->hash = (void*)&blakehash;
gate->get_max64 = (void*)&blake_get_max64;
return true;
}

View File

@@ -3,16 +3,13 @@
* tpruvot@github 2015-2016
*/
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
#include <stdint.h>
#include "algo/blake/sph_blake2b.h"
static __thread sph_blake2b_ctx s_midstate;
static __thread sph_blake2b_ctx s_ctx;
//static __thread sph_blake2b_ctx s_midstate;
//static __thread sph_blake2b_ctx s_ctx;
#define MIDLEN 76
#define A 64
@@ -28,6 +25,7 @@ void blake2b_hash(void *output, const void *input)
memcpy(output, hash, 32);
}
/*
static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
{
s_ctx.outlen = MIDLEN;
@@ -35,6 +33,7 @@ static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
sph_blake2b_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
sph_blake2b_final(&s_ctx, (uint8_t*) output);
}
*/
int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
@@ -220,6 +219,8 @@ bool register_blake2b_algo( algo_gate_t* gate )
gate->hash = (void*)&blake2b_hash;
gate->calc_network_diff = (void*)&blake2b_calc_network_diff;
gate->build_stratum_request = (void*)&blake2b_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->build_extraheader = (void*)&blake2b_build_extraheader;
gate->get_new_work = (void*)&blake2b_get_new_work;
gate->get_max64 = (void*)&blake2b_get_max64;

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#define BLAKE32_ROUNDS 8
#include "sph_blake.h"

153
algo/blake/decred-4way.c Normal file
View File

@@ -0,0 +1,153 @@
#include "decred-gate.h"
#include "sph_blake.h"
#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>
#include <unistd.h>
#if defined (DECRED_4WAY)
static __thread blake256_4way_context blake_mid;
static __thread bool ctx_midstate_done = false;
void decred_hash_4way( void *state, const void *input )
{
uint32_t hash0[16] __attribute__ ((aligned (64)));
uint32_t hash1[16] __attribute__ ((aligned (64)));
uint32_t hash2[16] __attribute__ ((aligned (64)));
uint32_t hash3[16] __attribute__ ((aligned (64)));
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
blake256_4way_context ctx __attribute__ ((aligned (64)));
sph_blake256_context ctx2 __attribute__ ((aligned (64)));
uint32_t hash[16] __attribute__ ((aligned (64)));
uint32_t sin0[45], sin1[45], sin2[45], sin3[45];
m128_deinterleave_4x32( sin0, sin1, sin2, sin3, (uint32_t*)input, 180*8 );
void *tail = input + DECRED_MIDSTATE_LEN;
int tail_len = 180 - DECRED_MIDSTATE_LEN;
// #define MIDSTATE_LEN 128
/*
uint8_t *ending = (uint8_t*) input;
ending += MIDSTATE_LEN;
if ( !ctx_midstate_done )
{
blake256_4way_init( &blake_mid );
blake256_4way( &blake_mid, input, DECRED_MIDSTATE_LEN );
ctx_midstate_done = true;
}
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
blake256_4way( &ctx, tail, tail_len );
blake256_4way_close( &ctx, vhash );
*/
sph_blake256_init( &ctx2 );
sph_blake256( &ctx2, sin0, 180 );
sph_blake256_close( &ctx2, hash );
blake256_4way_init( &ctx );
blake256_4way( &ctx, input, 180 );
blake256_4way_close( &ctx, vhash );
m128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
/*
for ( int i = 0; i < 8; i++ )
if ( hash[i] != hash0[i] )
printf(" hash mismatch, i = %u\n",i);
printf("hash: %08lx %08lx %08lx %08lx\n", *hash, *(hash+1),
*(hash+2), *(hash+3) );
printf("hash0: %08lx %08lx %08lx %08lx\n", *hash0, *(hash0+1),
*(hash0+2), *(hash0+3) );
printf("\n");
*/
// memcpy( state, hash0, 32 );
// memcpy( state+32, hash1, 32 );
// memcpy( state+64, hash1, 32 );
// memcpy( state+96, hash1, 32 );
memcpy( state, hash, 32 );
}
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t vdata[45*4] __attribute__ ((aligned (64)));
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) endiandata[48];
// uint32_t _ALIGN(64) hash32[8];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
uint32_t n = first_nonce;
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
// #define DCR_NONCE_OFT32 35
ctx_midstate_done = false;
// memcpy(endiandata, pdata, 180);
m128_interleave_4x32( vdata, pdata, pdata, pdata, pdata, 180*8 );
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
do {
found[0] = found[1] = found[2] = found[3] = false;
* noncep = n;
*(noncep+2) = n+1;
*(noncep+4) = n+2;
*(noncep+6) = n+3;
decred_hash_4way( hash, vdata );
// endiandata[DCR_NONCE_OFT32] = n;
// decred_hash(hash32, endiandata);
if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
{
work_set_target_ratio( work, hash );
found[0] = true;
num_found++;
nonces[0] = n;
pdata[DECRED_NONCE_INDEX] = n;
}
/* if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
{
work_set_target_ratio( work, hash+8 );
found[1] = true;
num_found++;
nonces[1] = n;
}
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
{
work_set_target_ratio( work, hash+16 );
found[2] = true;
num_found++;
nonces[2] = n;
}
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
{
work_set_target_ratio( work, hash+24 );
found[3] = true;
num_found++;
nonces[3] = n;
}
*/
n += 4;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

176
algo/blake/decred-gate.c Normal file
View File

@@ -0,0 +1,176 @@
#include "decred-gate.h"
#include <unistd.h>
#include <memory.h>
#include <string.h>
uint32_t *decred_get_nonceptr( uint32_t *work_data )
{
return &work_data[ DECRED_NONCE_INDEX ];
}
double decred_calc_network_diff( struct work* work )
{
// sample for diff 43.281 : 1c05ea29
// todo: endian reversed on longpoll could be zr5 specific...
uint32_t nbits = work->data[ DECRED_NBITS_INDEX ];
uint32_t bits = ( nbits & 0xffffff );
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
int m;
double d = (double)0x0000ffff / (double)bits;
for ( m = shift; m < 29; m++ )
d *= 256.0;
for ( m = 29; m < shift; m++ )
d /= 256.0;
if ( shift == 28 )
d *= 256.0; // testnet
if ( opt_debug_diff )
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d,
shift, bits );
return net_diff;
}
void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
{
// some random extradata to make the work unique
work->data[ DECRED_XNONCE_INDEX ] = (rand()*4);
work->height = work->data[32];
if (!have_longpoll && work->height > *net_blocks + 1)
{
char netinfo[64] = { 0 };
if (opt_showdiff && net_diff > 0.)
{
if (net_diff != work->targetdiff)
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
work->targetdiff);
else
sprintf(netinfo, ", diff %.3f", net_diff);
}
applog(LOG_BLUE, "%s block %d%s", algo_names[opt_algo], work->height,
netinfo);
*net_blocks = work->height - 1;
}
}
void decred_be_build_stratum_request( char *req, struct work *work,
struct stratum_ctx *sctx )
{
unsigned char *xnonce2str;
uint32_t ntime, nonce;
char ntimestr[9], noncestr[9];
be32enc( &ntime, work->data[ DECRED_NTIME_INDEX ] );
be32enc( &nonce, work->data[ DECRED_NONCE_INDEX ] );
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
xnonce2str = abin2hex( (char*)( &work->data[ DECRED_XNONCE_INDEX ] ),
sctx->xnonce1_size );
snprintf( req, JSON_BUF_LEN,
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
free(xnonce2str);
}
#define min(a,b) (a>b ? (b) :(a))
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
uchar merkle_root[64] = { 0 };
uint32_t extraheader[32] = { 0 };
int headersize = 0;
uint32_t* extradata = (uint32_t*) sctx->xnonce1;
size_t t;
int i;
// getwork over stratum, getwork merkle + header passed in coinb1
memcpy(merkle_root, sctx->job.coinbase, 32);
headersize = min((int)sctx->job.coinbase_size - 32,
sizeof(extraheader) );
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = swab32(
le32dec( (uint32_t *) sctx->job.prevhash + i ) );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = swab32( be32dec( (uint32_t *) merkle_root + i ) );
// for ( i = 0; i < 8; i++ ) // prevhash
// g_work->data[1 + i] = swab32( g_work->data[1 + i] );
// for ( i = 0; i < 8; i++ ) // merkle
// g_work->data[9 + i] = swab32( g_work->data[9 + i] );
for ( i = 0; i < headersize/4; i++ ) // header
g_work->data[17 + i] = extraheader[i];
// extradata
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
g_work->data[i] = 0;
g_work->data[37] = (rand()*4) << 8;
// block header suffix from coinb2 (stake version)
memcpy( &g_work->data[44],
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
sctx->bloc_height = g_work->data[32];
//applog_hex(work->data, 180);
//applog_hex(&work->data[36], 36);
}
#undef min
bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id )
{
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
// need to regen g_work..
return false;
if ( have_stratum && !work->data[0] && !opt_benchmark )
{
sleep(1);
return false;
}
// extradata: prevent duplicates
work->data[ DECRED_XNONCE_INDEX ] += 1;
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
return true;
}
bool register_decred_algo( algo_gate_t* gate )
{
#if defined(DECRED_4WAY)
gate->optimizations = SSE2_OPT | AVX_OPT;
gate->scanhash = (void*)&scanhash_decred_4way;
gate->hash = (void*)&decred_hash_4way;
#else
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_decred;
gate->hash = (void*)&decred_hash;
#endif
// gate->optimizations = SSE2_OPT;
// gate->scanhash = (void*)&scanhash_decred;
// gate->hash = (void*)&decred_hash;
gate->get_nonceptr = (void*)&decred_get_nonceptr;
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
gate->display_extra_data = (void*)&decred_decode_extradata;
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->build_extraheader = (void*)&decred_build_extraheader;
gate->ready_to_mine = (void*)&decred_ready_to_mine;
gate->nbits_index = DECRED_NBITS_INDEX;
gate->ntime_index = DECRED_NTIME_INDEX;
gate->nonce_index = DECRED_NONCE_INDEX;
gate->work_data_size = DECRED_DATA_SIZE;
gate->work_cmp_size = DECRED_WORK_COMPARE_SIZE;
allow_mininginfo = false;
have_gbt = false;
return true;
}

36
algo/blake/decred-gate.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef __DECRED_GATE_H__
#define __DECRED_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#define DECRED_NBITS_INDEX 29
#define DECRED_NTIME_INDEX 34
#define DECRED_NONCE_INDEX 35
#define DECRED_XNONCE_INDEX 36
#define DECRED_DATA_SIZE 192
#define DECRED_WORK_COMPARE_SIZE 140
#define DECRED_MIDSTATE_LEN 128
#if defined (__AVX2__)
//void blakehash_84way(void *state, const void *input);
//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done );
#endif
#if defined(FOUR_WAY) && defined(__AVX__)
#define DECRED_4WAY
#endif
#if defined (DECRED_4WAY)
void decred_hash_4way(void *state, const void *input);
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void decred_hash( void *state, const void *input );
int scanhash_decred( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -1,5 +1,4 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "decred-gate.h"
#include "sph_blake.h"
#include <string.h>
@@ -15,33 +14,33 @@
#define max(a,b) (a<b ? b : a)
#endif
*/
/*
#define DECRED_NBITS_INDEX 29
#define DECRED_NTIME_INDEX 34
#define DECRED_NONCE_INDEX 35
#define DECRED_XNONCE_INDEX 36
#define DECRED_DATA_SIZE 192
#define DECRED_WORK_COMPARE_SIZE 140
*/
static __thread sph_blake256_context blake_mid;
static __thread bool ctx_midstate_done = false;
void decred_hash(void *state, const void *input)
{
#define MIDSTATE_LEN 128
// #define MIDSTATE_LEN 128
sph_blake256_context ctx __attribute__ ((aligned (64)));
uint8_t *ending = (uint8_t*) input;
ending += MIDSTATE_LEN;
ending += DECRED_MIDSTATE_LEN;
if (!ctx_midstate_done) {
sph_blake256_init(&blake_mid);
sph_blake256(&blake_mid, input, MIDSTATE_LEN);
sph_blake256(&blake_mid, input, DECRED_MIDSTATE_LEN);
ctx_midstate_done = true;
}
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
sph_blake256(&ctx, ending, (180 - MIDSTATE_LEN));
sph_blake256(&ctx, ending, (180 - DECRED_MIDSTATE_LEN));
sph_blake256_close(&ctx, state);
}
@@ -60,9 +59,9 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
#define DCR_NONCE_OFT32 35
// #define DCR_NONCE_OFT32 35
const uint32_t first_nonce = pdata[DCR_NONCE_OFT32];
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
uint32_t n = first_nonce;
@@ -82,7 +81,7 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
do {
//be32enc(&endiandata[DCR_NONCE_OFT32], n);
endiandata[DCR_NONCE_OFT32] = n;
endiandata[DECRED_NONCE_INDEX] = n;
decred_hash(hash32, endiandata);
if (hash32[7] <= HTarget && fulltest(hash32, ptarget)) {
@@ -93,7 +92,7 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
applog_hash(ptarget);
applog_compare_hash(hash32, ptarget);
#endif
pdata[DCR_NONCE_OFT32] = n;
pdata[DECRED_NONCE_INDEX] = n;
return 1;
}
@@ -102,24 +101,17 @@ int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[DCR_NONCE_OFT32] = n;
pdata[DECRED_NONCE_INDEX] = n;
return 0;
}
/*
uint32_t *decred_get_nonceptr( uint32_t *work_data )
{
return &work_data[ DECRED_NONCE_INDEX ];
}
// does decred need a custom stratum_get_g_work to fix nicehash
// bad extranonce2 size?
//
// does decred need a custom init_nonce?
// does it need to increment nonce, seems not because gen_work_now always
// returns true
double decred_calc_network_diff( struct work* work )
//void decred_calc_network_diff( struct work* work )
{
// sample for diff 43.281 : 1c05ea29
// todo: endian reversed on longpoll could be zr5 specific...
@@ -181,7 +173,7 @@ void decred_be_build_stratum_request( char *req, struct work *work,
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
free(xnonce2str);
}
*/
/*
// data shared between gen_merkle_root and build_extraheader.
__thread uint32_t decred_extraheader[32] = { 0 };
@@ -197,7 +189,7 @@ void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
}
*/
/*
#define min(a,b) (a>b ? (b) :(a))
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
@@ -235,11 +227,15 @@ void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
for ( i = 0; i < headersize/4; i++ ) // header
g_work->data[17 + i] = extraheader[i];
// extradata
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
g_work->data[i] = 0;
g_work->data[37] = (rand()*4) << 8;
// block header suffix from coinb2 (stake version)
memcpy( &g_work->data[44],
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
sctx->bloc_height = g_work->data[32];
//applog_hex(work->data, 180);
//applog_hex(&work->data[36], 36);
@@ -274,6 +270,8 @@ bool register_decred_algo( algo_gate_t* gate )
gate->get_max64 = (void*)&get_max64_0x3fffffLL;
gate->display_extra_data = (void*)&decred_decode_extradata;
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->build_extraheader = (void*)&decred_build_extraheader;
gate->ready_to_mine = (void*)&decred_ready_to_mine;
gate->nbits_index = DECRED_NBITS_INDEX;
@@ -285,4 +283,4 @@ bool register_decred_algo( algo_gate_t* gate )
have_gbt = false;
return true;
}
*/

View File

@@ -0,0 +1,206 @@
#include "pentablake-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake-hash-4way.h"
#include "sph_blake.h"
//#define DEBUG_ALGO
#ifdef PENTABLAKE_4WAY
extern void pentablakehash_4way( void *output, const void *input )
{
unsigned char _ALIGN(32) hash[128];
// // same as uint32_t hashA[16], hashB[16];
// #define hashB hash+64
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
blake512_4way_context ctx;
blake512_4way_init( &ctx );
blake512_4way( &ctx, input, 80 );
blake512_4way_close( &ctx, vhash );
uint64_t sin0[10], sin1[10], sin2[10], sin3[10];
m256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 );
sph_blake512_context ctx2_blake;
sph_blake512_init(&ctx2_blake);
sph_blake512(&ctx2_blake, sin0, 80);
sph_blake512_close(&ctx2_blake, (void*) hash);
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
uint64_t* hash64 = (uint64_t*)hash;
for( int i = 0; i < 8; i++ )
{
if ( hash0[i] != hash64[i] )
printf("hash mismatch %u\n",i);
}
blake512_4way_init( &ctx );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
blake512_4way_init( &ctx );
blake512_4way( &ctx, vhash, 64 );
blake512_4way_close( &ctx, vhash );
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
/*
uint64_t sin0[10] __attribute__ ((aligned (64)));
uint64_t sin1[10] __attribute__ ((aligned (64)));
uint64_t sin2[10] __attribute__ ((aligned (64)));
uint64_t sin3[10] __attribute__ ((aligned (64)));
sph_blake512_context ctx_blake;
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, input, 80);
sph_blake512_close(&ctx_blake, hash);
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash);
memcpy(output, hash, 32);
*/
}
int scanhash_pentablake_4way( int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t endiandata[32] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
// uint32_t _ALIGN(32) hash64[8];
// uint32_t _ALIGN(32) endiandata[32];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
for ( int m=0; m < 6; m++ )
{
if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
pentablakehash_4way( hash, vdata );
// return immediately on nonce found, only one submit
if ( ( !(hash[7] & mask) ) && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( (! ((hash+8)[7] & mask) ) && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( ( !((hash+16)[7] & mask) ) && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
if ( ( !((hash+24)[7] & mask) ) && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n;
*hashes_done = n - first_nonce + 1;
return 1;
}
n += 4;
} while (n < max_nonce && !work_restart[thr_id].restart);
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif

View File

@@ -0,0 +1,16 @@
#include "pentablake-gate.h"
bool register_pentablake_algo( algo_gate_t* gate )
{
#if defined (PENTABLAKE_4WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_pentablake_4way;
gate->hash = (void*)&pentablakehash_4way;
#else
gate->scanhash = (void*)&scanhash_pentablake;
gate->hash = (void*)&pentablakehash;
#endif
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -0,0 +1,21 @@
#ifndef __PENTABLAKE_GATE_H__
#define __PENTABLAKE_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX__)
#define PENTABLAKE_4WAY
#endif
#if defined(PENTABLAKE_4WAY)
void pentablakehash_4way( void *state, const void *input );
int scanhash_pentablake_4way( int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done );
#endif
void pentablakehash( void *state, const void *input );
int scanhash_pentablake( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -1,5 +1,4 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "pentablake-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -111,11 +110,3 @@ int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
return 0;
}
bool register_pentablake_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_pentablake;
gate->hash = (void*)&pentablakehash;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -813,6 +813,7 @@ blake32(sph_blake_small_context *sc, const void *data, size_t len)
buf = sc->buf;
ptr = sc->ptr;
if (len < (sizeof sc->buf) - ptr) {
memcpy(buf + ptr, data, len);
ptr += len;
@@ -890,9 +891,9 @@ blake32_close(sph_blake_small_context *sc,
sph_enc32be_aligned(u.buf + 60, tl);
blake32(sc, u.buf, 64);
}
out = dst;
for (k = 0; k < out_size_w32; k ++)
sph_enc32be(out + (k << 2), sc->H[k]);
out = dst;
for (k = 0; k < out_size_w32; k ++)
sph_enc32be(out + (k << 2), sc->H[k]);
}
#if SPH_64
@@ -982,9 +983,11 @@ blake64_close(sph_blake_big_context *sc,
u.buf[111] |= 1;
sph_enc64be_aligned(u.buf + 112, th);
sph_enc64be_aligned(u.buf + 120, tl);
blake64(sc, u.buf + ptr, 128 - ptr);
} else {
memset(u.buf + ptr + 1, 0, 127 - ptr);
blake64(sc, u.buf + ptr, 128 - ptr);
sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
@@ -993,6 +996,7 @@ blake64_close(sph_blake_big_context *sc,
u.buf[111] = 1;
sph_enc64be_aligned(u.buf + 112, th);
sph_enc64be_aligned(u.buf + 120, tl);
blake64(sc, u.buf, 128);
}
out = dst;

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>

View File

@@ -2,7 +2,6 @@
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include "miner.h"
#include "algo-gate-api.h"
#if defined(__arm__) || defined(_MSC_VER)

View File

@@ -109,43 +109,43 @@ static __thread cryptonight_ctx ctx;
void cryptonight_hash_aes( void *restrict output, const void *input, int len )
{
#ifndef NO_AES_NI
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
uint8_t ExpandedKey[256] __attribute__((aligned(64)));
__m128i *longoutput, *expkey, *xmminput;
size_t i, j;
memcpy(ctx.text, ctx.state.init, INIT_SIZE_BYTE);
memcpy(ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE);
ExpandAESKey256(ExpandedKey);
keccak( (const uint8_t*)input, 76, (char*)&ctx.state.hs.b, 200 );
memcpy( ExpandedKey, ctx.state.hs.b, AES_KEY_SIZE );
ExpandAESKey256( ExpandedKey );
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
__m128i *longoutput, *expkey, *xmminput;
longoutput = (__m128i *)ctx.long_state;
expkey = (__m128i *)ExpandedKey;
xmminput = (__m128i *)ctx.text;
longoutput = (__m128i*)ctx.long_state;
xmminput = (__m128i*)ctx.text;
expkey = (__m128i*)ExpandedKey;
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
// aesni_parallel_noxor(&ctx->long_state[i], ctx->text, ExpandedKey);
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
// prefetch expkey, xmminput and enough longoutput for 4 iterations
_mm_prefetch( xmminput, _MM_HINT_T0 );
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
for ( i = 0; i < 64; i += 16 )
{
_mm_prefetch( longoutput + i, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 4, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 8, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 12, _MM_HINT_T0 );
}
_mm_prefetch( expkey, _MM_HINT_T0 );
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
for ( i = 0; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
for ( i = 0; i < 64; i += 16 )
{
// prefetch 4 loops ahead,
__builtin_prefetch( longoutput + i, 1, 0 );
__builtin_prefetch( longoutput + i + 4, 1, 0 );
__builtin_prefetch( longoutput + i + 8, 1, 0 );
__builtin_prefetch( longoutput + i + 12, 1, 0 );
}
// n-4 iterations
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
i += INIT_SIZE_M128I )
{
// prefetch 4 iterations ahead.
__builtin_prefetch( longoutput + i + 64, 1, 0 );
__builtin_prefetch( longoutput + i + 68, 1, 0 );
for (j = 0; j < 10; j++ )
for ( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
@@ -165,84 +165,99 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
}
// last 4 iterations
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
{
for ( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
_mm_store_si128( &( longoutput[i ] ), xmminput[0] );
_mm_store_si128( &( longoutput[i+1] ), xmminput[1] );
_mm_store_si128( &( longoutput[i+2] ), xmminput[2] );
_mm_store_si128( &( longoutput[i+3] ), xmminput[3] );
_mm_store_si128( &( longoutput[i+4] ), xmminput[4] );
_mm_store_si128( &( longoutput[i+5] ), xmminput[5] );
_mm_store_si128( &( longoutput[i+6] ), xmminput[6] );
_mm_store_si128( &( longoutput[i+7] ), xmminput[7] );
}
// cast_m128i( ctx.a ) = _mm_xor_si128( casti_m128i( ctx.state.k, 0 ) ,
// casti_m128i( ctx.state.k, 2 ) );
// cast_m128i( ctx.b ) = _mm_xor_si128( casti_m128i( ctx.state.k, 1 ),
// casti_m128i( ctx.state.k, 3 ) );
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
ctx.a[0] = ((uint64_t *)ctx.state.k)[0] ^ ((uint64_t *)ctx.state.k)[4];
ctx.b[0] = ((uint64_t *)ctx.state.k)[2] ^ ((uint64_t *)ctx.state.k)[6];
ctx.a[1] = ((uint64_t *)ctx.state.k)[1] ^ ((uint64_t *)ctx.state.k)[5];
ctx.b[1] = ((uint64_t *)ctx.state.k)[3] ^ ((uint64_t *)ctx.state.k)[7];
// for (i = 0; i < 2; i++)
// {
// ctx.a[i] = ((uint64_t *)ctx.state.k)[i] ^ ((uint64_t *)ctx.state.k)[i+4];
// ctx.b[i] = ((uint64_t *)ctx.state.k)[i+2] ^ ((uint64_t *)ctx.state.k)[i+6];
// }
__m128i b_x = _mm_load_si128((__m128i *)ctx.b);
uint64_t a[2] __attribute((aligned(16))), b[2] __attribute((aligned(16)));
uint64_t a[2] __attribute((aligned(16))),
b[2] __attribute((aligned(16))),
c[2] __attribute((aligned(16)));
a[0] = ctx.a[0];
a[1] = ctx.a[1];
for(i = 0; __builtin_expect(i < 0x80000, 1); i++)
__m128i b_x = _mm_load_si128( (__m128i*)ctx.b );
__m128i a_x = _mm_load_si128( (__m128i*)a );
__m128i* lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
__m128i c_x = _mm_load_si128( lsa );
uint64_t *nextblock;
uint64_t hi, lo;
// n-1 iterations
for( i = 0; __builtin_expect( i < 0x7ffff, 1 ); i++ )
{
uint64_t c[2];
__builtin_prefetch( &ctx.long_state[c[0] & 0x1FFFF0], 0, 1 );
__m128i c_x = _mm_load_si128(
(__m128i *)&ctx.long_state[a[0] & 0x1FFFF0]);
__m128i a_x = _mm_load_si128((__m128i *)a);
c_x = _mm_aesenc_si128(c_x, a_x);
_mm_store_si128((__m128i *)c, c_x);
b_x = _mm_xor_si128(b_x, c_x);
_mm_store_si128((__m128i *)&ctx.long_state[a[0] & 0x1FFFF0], b_x);
uint64_t *nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
// uint64_t b[2];
c_x = _mm_aesenc_si128( c_x, a_x );
_mm_store_si128( (__m128i*)c, c_x );
b_x = _mm_xor_si128( b_x, c_x );
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
_mm_store_si128( lsa, b_x );
b[0] = nextblock[0];
b[1] = nextblock[1];
{
uint64_t hi, lo;
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
// hi,lo = 64bit x 64bit multiply of c[0] and b[0]
__asm__( "mulq %3\n\t"
: "=d" ( hi ),
"=a" ( lo )
: "%a" ( c[0] ),
"rm" ( b[0] )
: "cc" );
__asm__("mulq %3\n\t"
: "=d" (hi),
"=a" (lo)
: "%a" (c[0]),
"rm" (b[0])
: "cc" );
a[0] += hi;
a[1] += lo;
}
uint64_t *dst = (uint64_t*)&ctx.long_state[c[0] & 0x1FFFF0];
// __m128i *dst = (__m128i*)&ctx.long_state[c[0] & 0x1FFFF0];
// *dst = cast_m128i( a );
dst[0] = a[0];
dst[1] = a[1];
// cast_m128i( a ) = _mm_xor_si128( cast_m128i( a ), cast_m128i( b ) );
a[0] ^= b[0];
a[1] ^= b[1];
b_x = c_x;
__builtin_prefetch( &ctx.long_state[a[0] & 0x1FFFF0], 0, 3 );
b_x = c_x;
nextblock[0] = a[0] + hi;
nextblock[1] = a[1] + lo;
a[0] = b[0] ^ nextblock[0];
a[1] = b[1] ^ nextblock[1];
lsa = (__m128i*)&ctx.long_state[ a[0] & 0x1FFFF0 ];
a_x = _mm_load_si128( (__m128i*)a );
c_x = _mm_load_si128( lsa );
}
// abreviated nth iteration
c_x = _mm_aesenc_si128( c_x, a_x );
_mm_store_si128( (__m128i*)c, c_x );
b_x = _mm_xor_si128( b_x, c_x );
nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
_mm_store_si128( lsa, b_x );
b[0] = nextblock[0];
b[1] = nextblock[1];
__asm__( "mulq %3\n\t"
: "=d" ( hi ),
"=a" ( lo )
: "%a" ( c[0] ),
"rm" ( b[0] )
: "cc" );
nextblock[0] = a[0] + hi;
nextblock[1] = a[1] + lo;
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
memcpy( ExpandedKey, &ctx.state.hs.b[32], AES_KEY_SIZE );
ExpandAESKey256( ExpandedKey );
//for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE)
// aesni_parallel_xor(&ctx->text, ExpandedKey, &ctx->long_state[i]);
memcpy( ctx.text, ctx.state.init, INIT_SIZE_BYTE );
// prefetch expkey, all of xmminput and enough longoutput for 4 loops
_mm_prefetch( xmminput, _MM_HINT_T0 );
_mm_prefetch( xmminput + 4, _MM_HINT_T0 );
for ( i = 0; i < 64; i += 16 )
@@ -256,9 +271,11 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
_mm_prefetch( expkey + 4, _MM_HINT_T0 );
_mm_prefetch( expkey + 8, _MM_HINT_T0 );
for ( i = 0; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
// n-4 iterations
for ( i = 0; likely( i < MEMORY_M128I - 4*INIT_SIZE_M128I );
i += INIT_SIZE_M128I )
{
// stay 4 loops ahead,
// stay 4 iterations ahead.
_mm_prefetch( longoutput + i + 64, _MM_HINT_T0 );
_mm_prefetch( longoutput + i + 68, _MM_HINT_T0 );
@@ -283,10 +300,34 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
}
// last 4 iterations
for ( ; likely( i < MEMORY_M128I ); i += INIT_SIZE_M128I )
{
xmminput[0] = _mm_xor_si128( longoutput[i ], xmminput[0] );
xmminput[1] = _mm_xor_si128( longoutput[i+1], xmminput[1] );
xmminput[2] = _mm_xor_si128( longoutput[i+2], xmminput[2] );
xmminput[3] = _mm_xor_si128( longoutput[i+3], xmminput[3] );
xmminput[4] = _mm_xor_si128( longoutput[i+4], xmminput[4] );
xmminput[5] = _mm_xor_si128( longoutput[i+5], xmminput[5] );
xmminput[6] = _mm_xor_si128( longoutput[i+6], xmminput[6] );
xmminput[7] = _mm_xor_si128( longoutput[i+7], xmminput[7] );
for( j = 0; j < 10; j++ )
{
xmminput[0] = _mm_aesenc_si128( xmminput[0], expkey[j] );
xmminput[1] = _mm_aesenc_si128( xmminput[1], expkey[j] );
xmminput[2] = _mm_aesenc_si128( xmminput[2], expkey[j] );
xmminput[3] = _mm_aesenc_si128( xmminput[3], expkey[j] );
xmminput[4] = _mm_aesenc_si128( xmminput[4], expkey[j] );
xmminput[5] = _mm_aesenc_si128( xmminput[5], expkey[j] );
xmminput[6] = _mm_aesenc_si128( xmminput[6], expkey[j] );
xmminput[7] = _mm_aesenc_si128( xmminput[7], expkey[j] );
}
}
memcpy( ctx.state.init, ctx.text, INIT_SIZE_BYTE);
keccakf( (uint64_t*)&ctx.state.hs.w, 24 );
extra_hashes[ctx.state.hs.b[0] & 3](&ctx.state, 200, output);
#endif
}

View File

@@ -5,7 +5,6 @@
// Modified for CPUminer by Lucas Jones
#include "cpuminer-config.h"
//#include "miner.h"
#include "algo-gate-api.h"
#ifndef NO_AES_NI

View File

@@ -32,7 +32,6 @@
#define POK_BOOL_MASK 0x00008000
#define POK_DATA_MASK 0xFFFF0000
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
@@ -248,7 +247,9 @@ bool register_drop_algo( algo_gate_t* gate )
gate->get_new_work = (void*)&drop_get_new_work;
gate->set_target = (void*)&scrypt_set_target;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->set_work_data_endian = (void*)&swab_work_data;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
gate->display_extra_data = (void*)&drop_display_pok;
gate->work_data_size = 80;
gate->work_cmp_size = 72;

View File

@@ -53,11 +53,12 @@ extern const unsigned int _k_aesmix4[];
x = _mm_shuffle_epi8(*((__m128i*)table + 0), x);\
x = _mm_xor_si128(x, t1)
#if 0
// compiled erroneously with 32-bit msc compiler
//t2 = _mm_shuffle_epi8(table[0], x);\
//x = _mm_shuffle_epi8(table[1], t1);\
//x = _mm_xor_si128(x, t2)
t2 = _mm_shuffle_epi8(table[0], x);\
x = _mm_shuffle_epi8(table[1], t1);\
x = _mm_xor_si128(x, t2)
#endif
// input: x
// output: t2, t3

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>

View File

@@ -21,7 +21,7 @@
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#include "algo/sha/brg_types.h"
/* some sizes (number of bytes) */
#define ROWS (8)

View File

@@ -35,7 +35,7 @@ typedef crypto_uint64 u64;
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#include "algo/sha/brg_types.h"
#ifdef IACA_TRACE
#include IACA_MARKS

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdio.h>
@@ -99,22 +98,21 @@ void groestl_set_target( struct work* work, double job_diff )
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool register_groestl_algo( algo_gate_t* gate )
bool register_dmd_gr_algo( algo_gate_t* gate )
{
init_groestl_ctx();
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_groestl;
gate->hash = (void*)&groestlhash;
gate->set_target = (void*)&groestl_set_target;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};
bool register_dmd_gr_algo( algo_gate_t* gate )
bool register_groestl_algo( algo_gate_t* gate )
{
register_groestl_algo( gate );
gate->gen_merkle_root = (void*)&sha256d_gen_merkle_root;
register_dmd_gr_algo( gate );
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
return true;
};

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdio.h>
@@ -12,11 +11,8 @@
#include "aes_ni/hash-groestl.h"
#endif
#if defined __SHA__
#include <openssl/sha.h>
#else
#include "algo/sha/sph_sha2.h"
#endif
#include <openssl/sha.h>
#include "algo/sha/sph_sha2.h"
typedef struct {
#ifdef NO_AES_NI
@@ -24,7 +20,7 @@ typedef struct {
#else
hashState_groestl groestl;
#endif
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_CTX sha;
#else
sph_sha256_context sha;
@@ -40,7 +36,7 @@ void init_myrgr_ctx()
#else
init_groestl (&myrgr_ctx.groestl, 64 );
#endif
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Init( &myrgr_ctx.sha );
#else
sph_sha256_init( &myrgr_ctx.sha );
@@ -61,7 +57,7 @@ void myriadhash( void *output, const void *input )
(const char*)input, 640 );
#endif
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Update( &ctx.sha, hash, 64 );
SHA256_Final( (unsigned char*) hash, &ctx.sha );
#else
@@ -108,7 +104,7 @@ int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
bool register_myriad_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
init_myrgr_ctx();
gate->scanhash = (void*)&scanhash_myriad;
gate->hash = (void*)&myriadhash;

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdio.h>

View File

@@ -2,7 +2,6 @@
#include <openssl/sha.h>
#include <stdint.h>
#include "miner.h"
#include "algo-gate-api.h"
#include "sph_hefty1.h"
#include "algo/keccak/sph_keccak.h"

View File

@@ -1,16 +1,12 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
#include <stdint.h>
#include "algo/blake/sph_blake.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
@@ -22,12 +18,11 @@
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/sha/sph_sha2.h"
#include "algo/haval/sph-haval.h"
#include <openssl/sha.h>
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
#include "algo/simd/sse2/nist.h"
@@ -47,7 +42,11 @@ typedef struct {
sph_fugue512_context fugue1, fugue2;
sph_shabal512_context shabal1;
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
#ifndef USE_SPH_SHA
SHA512_CTX sha1, sha2;
#else
sph_sha512_context sha1, sha2;
#endif
sph_haval256_5_context haval1, haval2;
#ifdef NO_AES_NI
sph_groestl512_context groestl1, groestl2;
@@ -102,9 +101,13 @@ void init_hmq1725_ctx()
sph_whirlpool_init(&hmq1725_ctx.whirlpool3);
sph_whirlpool_init(&hmq1725_ctx.whirlpool4);
#ifndef USE_SPH_SHA
SHA512_Init( &hmq1725_ctx.sha1 );
SHA512_Init( &hmq1725_ctx.sha2 );
#else
sph_sha512_init(&hmq1725_ctx.sha1);
sph_sha512_init(&hmq1725_ctx.sha2);
#endif
sph_haval256_5_init(&hmq1725_ctx.haval1);
sph_haval256_5_init(&hmq1725_ctx.haval2);
@@ -271,8 +274,13 @@ extern void hmq1725hash(void *state, const void *input)
}
else
{
#ifndef USE_SPH_SHA
SHA512_Update( &h_ctx.sha1, hashB, 64 );
SHA512_Final( (unsigned char*) hashA, &h_ctx.sha1 );
#else
sph_sha512 (&h_ctx.sha1, hashB, 64); //7
sph_sha512_close(&h_ctx.sha1, hashA); //8
#endif
}
#ifdef NO_AES_NI
@@ -283,8 +291,13 @@ extern void hmq1725hash(void *state, const void *input)
(const char*)hashA, 512 );
#endif
#ifndef USE_SPH_SHA
SHA512_Update( &h_ctx.sha2, hashB, 64 );
SHA512_Final( (unsigned char*) hashA, &h_ctx.sha2 );
#else
sph_sha512 (&h_ctx.sha2, hashB, 64); //2
sph_sha512_close(&h_ctx.sha2, hashA); //3
#endif
if ( hashA[0] & mask ) //4
{

View File

@@ -1,10 +1,7 @@
#include <memory.h>
#include <stdlib.h>
#include "miner.h"
//#include "algo-gate-api.h"
#include "hodl-gate.h"
//#include "hodl.h"
#include "hodl-wolf.h"
#define HODL_NSTARTLOC_INDEX 20
@@ -97,13 +94,7 @@ bool hodl_do_this_thread( int thr_id )
int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,
uint64_t *hashes_done )
{
#ifdef NO_AES_NI
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
return false;
// GetPsuedoRandomData( hodl_scratchbuf, work->data, thr_id );
// pthread_barrier_wait( &hodl_barrier );
// return scanhash_hodl( thr_id, work, max_nonce, hashes_done );
#else
#ifndef NO_AES_NI
GenRandomGarbage( hodl_scratchbuf, work->data, thr_id );
pthread_barrier_wait( &hodl_barrier );
return scanhash_hodl_wolf( thr_id, work, max_nonce, hashes_done );
@@ -112,6 +103,10 @@ int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,
bool register_hodl_algo( algo_gate_t* gate )
{
#ifdef NO_AES_NI
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
return false;
#endif
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->scanhash = (void*)&hodl_scanhash;

View File

@@ -4,6 +4,11 @@
//Dependencies
#include <string.h>
#include <stdlib.h>
#ifdef __FreeBSD__
#include <sys/endian.h>
#endif
#include "tmmintrin.h"
#include "smmintrin.h"

View File

@@ -3,6 +3,11 @@
//Dependencies
#include <string.h>
#include <stdlib.h>
#ifdef __FreeBSD__
#include <sys/endian.h>
#endif
#include "tmmintrin.h"
#include "smmintrin.h"
#include "immintrin.h"

639
algo/jh/jh-hash-4way.c Normal file
View File

@@ -0,0 +1,639 @@
/* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */
/*
* JH implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifdef __AVX2__
#include <stddef.h>
#include <string.h>
#include "jh-hash-4way.h"
#ifdef __cplusplus
extern "C"{
#endif
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_JH
#define SPH_SMALL_FOOTPRINT_JH 1
#endif
#if !defined SPH_JH_64 && SPH_64_TRUE
#define SPH_JH_64 1
#endif
#if !SPH_64
#undef SPH_JH_64
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
/*
* The internal bitslice representation may use either big-endian or
* little-endian (true bitslice operations do not care about the bit
* ordering, and the bit-swapping linear operations in JH happen to
* be invariant through endianness-swapping). The constants must be
* defined according to the chosen endianness; we use some
* byte-swapping macros for that.
*/
#if SPH_LITTLE_ENDIAN
#if SPH_64
#define C64e(x) ((SPH_C64(x) >> 56) \
| ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
| ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
| ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
| ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
| ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
| ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
| ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
#define dec64e_aligned sph_dec64le_aligned
#define enc64e sph_enc64le
#endif
#else
#if SPH_64
#define C64e(x) SPH_C64(x)
#define dec64e_aligned sph_dec64be_aligned
#define enc64e sph_enc64be
#endif
#endif
#define Sb(x0, x1, x2, x3, c) \
do { \
__m256i cc = _mm256_set_epi64x( c, c, c, c ); \
x3 = mm256_bitnot( x3 ); \
x0 = _mm256_xor_si256( x0, _mm256_and_si256( cc, mm256_bitnot( x2 ) ) ); \
tmp = _mm256_xor_si256( cc, _mm256_and_si256( x0, x1 ) ); \
x0 = _mm256_xor_si256( x0, _mm256_and_si256( x2, x3 ) ); \
x3 = _mm256_xor_si256( x3, _mm256_and_si256( mm256_bitnot( x1 ), x2 ) ); \
x1 = _mm256_xor_si256( x1, _mm256_and_si256( x0, x2 ) ); \
x2 = _mm256_xor_si256( x2, _mm256_and_si256( x0, mm256_bitnot( x3 ) ) ); \
x0 = _mm256_xor_si256( x0, _mm256_or_si256( x1, x3 ) ); \
x3 = _mm256_xor_si256( x3, _mm256_and_si256( x1, x2 ) ); \
x1 = _mm256_xor_si256( x1, _mm256_and_si256( tmp, x0 ) ); \
x2 = _mm256_xor_si256( x2, tmp ); \
} while (0)
/*
#define Sb(x0, x1, x2, x3, c) do { \
x3 = ~x3; \
x0 ^= (c) & ~x2; \
tmp = (c) ^ (x0 & x1); \
x0 ^= x2 & x3; \
x3 ^= ~x1 & x2; \
x1 ^= x0 & x2; \
x2 ^= x0 & ~x3; \
x0 ^= x1 | x3; \
x3 ^= x1 & x2; \
x1 ^= tmp & x0; \
x2 ^= tmp; \
} while (0)
*/
#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) \
do { \
x4 = _mm256_xor_si256( x4, x1 ); \
x5 = _mm256_xor_si256( x5, x2 ); \
x6 = _mm256_xor_si256( x6, _mm256_xor_si256( x3, x0 ) ); \
x7 = _mm256_xor_si256( x7, x0 ); \
x0 = _mm256_xor_si256( x0, x5 ); \
x1 = _mm256_xor_si256( x1, x6 ); \
x2 = _mm256_xor_si256( x2, _mm256_xor_si256( x7, x4 ) ); \
x3 = _mm256_xor_si256( x3, x4 ); \
} while (0)
/*
#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) do { \
x4 ^= x1; \
x5 ^= x2; \
x6 ^= x3 ^ x0; \
x7 ^= x0; \
x0 ^= x5; \
x1 ^= x6; \
x2 ^= x7 ^ x4; \
x3 ^= x4; \
} while (0)
*/
#if SPH_JH_64
static const sph_u64 C[] = {
C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
C64e(0xea983ae05c45fa9c), C64e(0x03c5d29966b2999a),
C64e(0x660296b4f2bb538a), C64e(0xb556141a88dba231),
C64e(0x03a35a5c9a190edb), C64e(0x403fb20a87c14410),
C64e(0x1c051980849e951d), C64e(0x6f33ebad5ee7cddc),
C64e(0x10ba139202bf6b41), C64e(0xdc786515f7bb27d0),
C64e(0x0a2c813937aa7850), C64e(0x3f1abfd2410091d3),
C64e(0x422d5a0df6cc7e90), C64e(0xdd629f9c92c097ce),
C64e(0x185ca70bc72b44ac), C64e(0xd1df65d663c6fc23),
C64e(0x976e6c039ee0b81a), C64e(0x2105457e446ceca8),
C64e(0xeef103bb5d8e61fa), C64e(0xfd9697b294838197),
C64e(0x4a8e8537db03302f), C64e(0x2a678d2dfb9f6a95),
C64e(0x8afe7381f8b8696c), C64e(0x8ac77246c07f4214),
C64e(0xc5f4158fbdc75ec4), C64e(0x75446fa78f11bb80),
C64e(0x52de75b7aee488bc), C64e(0x82b8001e98a6a3f4),
C64e(0x8ef48f33a9a36315), C64e(0xaa5f5624d5b7f989),
C64e(0xb6f1ed207c5ae0fd), C64e(0x36cae95a06422c36),
C64e(0xce2935434efe983d), C64e(0x533af974739a4ba7),
C64e(0xd0f51f596f4e8186), C64e(0x0e9dad81afd85a9f),
C64e(0xa7050667ee34626a), C64e(0x8b0b28be6eb91727),
C64e(0x47740726c680103f), C64e(0xe0a07e6fc67e487b),
C64e(0x0d550aa54af8a4c0), C64e(0x91e3e79f978ef19e),
C64e(0x8676728150608dd4), C64e(0x7e9e5a41f3e5b062),
C64e(0xfc9f1fec4054207a), C64e(0xe3e41a00cef4c984),
C64e(0x4fd794f59dfa95d8), C64e(0x552e7e1124c354a5),
C64e(0x5bdf7228bdfe6e28), C64e(0x78f57fe20fa5c4b2),
C64e(0x05897cefee49d32e), C64e(0x447e9385eb28597f),
C64e(0x705f6937b324314a), C64e(0x5e8628f11dd6e465),
C64e(0xc71b770451b920e7), C64e(0x74fe43e823d4878a),
C64e(0x7d29e8a3927694f2), C64e(0xddcb7a099b30d9c1),
C64e(0x1d1b30fb5bdc1be0), C64e(0xda24494ff29c82bf),
C64e(0xa4e7ba31b470bfff), C64e(0x0d324405def8bc48),
C64e(0x3baefc3253bbd339), C64e(0x459fc3c1e0298ba0),
C64e(0xe5c905fdf7ae090f), C64e(0x947034124290f134),
C64e(0xa271b701e344ed95), C64e(0xe93b8e364f2f984a),
C64e(0x88401d63a06cf615), C64e(0x47c1444b8752afff),
C64e(0x7ebb4af1e20ac630), C64e(0x4670b6c5cc6e8ce6),
C64e(0xa4d5a456bd4fca00), C64e(0xda9d844bc83e18ae),
C64e(0x7357ce453064d1ad), C64e(0xe8a6ce68145c2567),
C64e(0xa3da8cf2cb0ee116), C64e(0x33e906589a94999a),
C64e(0x1f60b220c26f847b), C64e(0xd1ceac7fa0d18518),
C64e(0x32595ba18ddd19d3), C64e(0x509a1cc0aaa5b446),
C64e(0x9f3d6367e4046bba), C64e(0xf6ca19ab0b56ee7e),
C64e(0x1fb179eaa9282174), C64e(0xe9bdf7353b3651ee),
C64e(0x1d57ac5a7550d376), C64e(0x3a46c2fea37d7001),
C64e(0xf735c1af98a4d842), C64e(0x78edec209e6b6779),
C64e(0x41836315ea3adba8), C64e(0xfac33b4d32832c83),
C64e(0xa7403b1f1c2747f3), C64e(0x5940f034b72d769a),
C64e(0xe73e4e6cd2214ffd), C64e(0xb8fd8d39dc5759ef),
C64e(0x8d9b0c492b49ebda), C64e(0x5ba2d74968f3700d),
C64e(0x7d3baed07a8d5584), C64e(0xf5a5e9f0e4f88e65),
C64e(0xa0b8a2f436103b53), C64e(0x0ca8079e753eec5a),
C64e(0x9168949256e8884f), C64e(0x5bb05c55f8babc4c),
C64e(0xe3bb3b99f387947b), C64e(0x75daf4d6726b1c5d),
C64e(0x64aeac28dc34b36d), C64e(0x6c34a550b828db71),
C64e(0xf861e2f2108d512a), C64e(0xe3db643359dd75fc),
C64e(0x1cacbcf143ce3fa2), C64e(0x67bbd13c02e843b0),
C64e(0x330a5bca8829a175), C64e(0x7f34194db416535c),
C64e(0x923b94c30e794d1e), C64e(0x797475d7b6eeaf3f),
C64e(0xeaa8d4f7be1a3921), C64e(0x5cf47e094c232751),
C64e(0x26a32453ba323cd2), C64e(0x44a3174a6da6d5ad),
C64e(0xb51d3ea6aff2c908), C64e(0x83593d98916b3c56),
C64e(0x4cf87ca17286604d), C64e(0x46e23ecc086ec7f6),
C64e(0x2f9833b3b1bc765e), C64e(0x2bd666a5efc4e62a),
C64e(0x06f4b6e8bec1d436), C64e(0x74ee8215bcef2163),
C64e(0xfdc14e0df453c969), C64e(0xa77d5ac406585826),
C64e(0x7ec1141606e0fa16), C64e(0x7e90af3d28639d3f),
C64e(0xd2c9f2e3009bd20c), C64e(0x5faace30b7d40c30),
C64e(0x742a5116f2e03298), C64e(0x0deb30d8e3cef89a),
C64e(0x4bc59e7bb5f17992), C64e(0xff51e66e048668d3),
C64e(0x9b234d57e6966731), C64e(0xcce6a6f3170a7505),
C64e(0xb17681d913326cce), C64e(0x3c175284f805a262),
C64e(0xf42bcbb378471547), C64e(0xff46548223936a48),
C64e(0x38df58074e5e6565), C64e(0xf2fc7c89fc86508e),
C64e(0x31702e44d00bca86), C64e(0xf04009a23078474e),
C64e(0x65a0ee39d1f73883), C64e(0xf75ee937e42c3abd),
C64e(0x2197b2260113f86f), C64e(0xa344edd1ef9fdee7),
C64e(0x8ba0df15762592d9), C64e(0x3c85f7f612dc42be),
C64e(0xd8a7ec7cab27b07e), C64e(0x538d7ddaaa3ea8de),
C64e(0xaa25ce93bd0269d8), C64e(0x5af643fd1a7308f9),
C64e(0xc05fefda174a19a5), C64e(0x974d66334cfd216a),
C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
};
#define Ceven_hi(r) (C[((r) << 2) + 0])
#define Ceven_lo(r) (C[((r) << 2) + 1])
#define Codd_hi(r) (C[((r) << 2) + 2])
#define Codd_lo(r) (C[((r) << 2) + 3])
#define S(x0, x1, x2, x3, cb, r) do { \
Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \
Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \
} while (0)
#define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \
x4 ## h, x5 ## h, x6 ## h, x7 ## h); \
Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \
x4 ## l, x5 ## l, x6 ## l, x7 ## l); \
} while (0)
#define Wz(x, c, n) \
do { \
__m256i t = _mm256_slli_epi64( _mm256_and_si256(x ## h, (c)), (n) ); \
x ## h = _mm256_or_si256( _mm256_and_si256( \
_mm256_srli_epi64(x ## h, (n)), (c)), t ); \
t = _mm256_slli_epi64( _mm256_and_si256(x ## l, (c)), (n) ); \
x ## l = _mm256_or_si256( _mm256_and_si256((x ## l >> (n)), (c)), t ); \
} while (0)
/*
#define Wz(x, c, n) do { \
sph_u64 t = (x ## h & (c)) << (n); \
x ## h = ((x ## h >> (n)) & (c)) | t; \
t = (x ## l & (c)) << (n); \
x ## l = ((x ## l >> (n)) & (c)) | t; \
} while (0)
*/
#define W0(x) Wz(x, _mm256_set_epi64x( 0x5555555555555555, \
0x5555555555555555, 0x5555555555555555, 0x5555555555555555 ), 1 )
#define W1(x) Wz(x, _mm256_set_epi64x( 0x3333333333333333, \
0x3333333333333333, 0x3333333333333333, 0x3333333333333333 ), 2 )
#define W2(x) Wz(x, _mm256_set_epi64x( 0x0F0F0F0F0F0F0F0F, \
0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F ), 4 )
#define W3(x) Wz(x, _mm256_set_epi64x( 0x00FF00FF00FF00FF, \
0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF ), 8 )
#define W4(x) Wz(x, _mm256_set_epi64x( 0x0000FFFF0000FFFF, \
0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF ), 16 )
#define W5(x) Wz(x, _mm256_set_epi64x( 0x00000000FFFFFFFF, \
0x00000000FFFFFFFF, 0x00000000FFFFFFFF, 0x00000000FFFFFFFF ), 32 )
#define W6(x) \
do { \
__m256i t = x ## h; \
x ## h = x ## l; \
x ## l = t; \
} while (0)
/*
#define W0(x) Wz(x, SPH_C64(0x5555555555555555), 1)
#define W1(x) Wz(x, SPH_C64(0x3333333333333333), 2)
#define W2(x) Wz(x, SPH_C64(0x0F0F0F0F0F0F0F0F), 4)
#define W3(x) Wz(x, SPH_C64(0x00FF00FF00FF00FF), 8)
#define W4(x) Wz(x, SPH_C64(0x0000FFFF0000FFFF), 16)
#define W5(x) Wz(x, SPH_C64(0x00000000FFFFFFFF), 32)
#define W6(x) do { \
sph_u64 t = x ## h; \
x ## h = x ## l; \
x ## l = t; \
} while (0)
*/
#define DECL_STATE \
__m256i h0h, h1h, h2h, h3h, h4h, h5h, h6h, h7h; \
__m256i h0l, h1l, h2l, h3l, h4l, h5l, h6l, h7l; \
__m256i tmp;
#define READ_STATE(state) do { \
h0h = (state)->H[ 0]; \
h0l = (state)->H[ 1]; \
h1h = (state)->H[ 2]; \
h1l = (state)->H[ 3]; \
h2h = (state)->H[ 4]; \
h2l = (state)->H[ 5]; \
h3h = (state)->H[ 6]; \
h3l = (state)->H[ 7]; \
h4h = (state)->H[ 8]; \
h4l = (state)->H[ 9]; \
h5h = (state)->H[10]; \
h5l = (state)->H[11]; \
h6h = (state)->H[12]; \
h6l = (state)->H[13]; \
h7h = (state)->H[14]; \
h7l = (state)->H[15]; \
} while (0)
#define WRITE_STATE(state) do { \
(state)->H[ 0] = h0h; \
(state)->H[ 1] = h0l; \
(state)->H[ 2] = h1h; \
(state)->H[ 3] = h1l; \
(state)->H[ 4] = h2h; \
(state)->H[ 5] = h2l; \
(state)->H[ 6] = h3h; \
(state)->H[ 7] = h3l; \
(state)->H[ 8] = h4h; \
(state)->H[ 9] = h4l; \
(state)->H[10] = h5h; \
(state)->H[11] = h5l; \
(state)->H[12] = h6h; \
(state)->H[13] = h6l; \
(state)->H[14] = h7h; \
(state)->H[15] = h7l; \
} while (0)
#define INPUT_BUF1 \
__m256i m0h = buf[0]; \
__m256i m0l = buf[1]; \
__m256i m1h = buf[2]; \
__m256i m1l = buf[3]; \
__m256i m2h = buf[4]; \
__m256i m2l = buf[5]; \
__m256i m3h = buf[6]; \
__m256i m3l = buf[7]; \
h0h = _mm256_xor_si256( h0h, m0h ); \
h0l = _mm256_xor_si256( h0l, m0l ); \
h1h = _mm256_xor_si256( h1h, m1h ); \
h1l = _mm256_xor_si256( h1l, m1l ); \
h2h = _mm256_xor_si256( h2h, m2h ); \
h2l = _mm256_xor_si256( h2l, m2l ); \
h3h = _mm256_xor_si256( h3h, m3h ); \
h3l = _mm256_xor_si256( h3l, m3l ); \
#define INPUT_BUF2 \
h4h = _mm256_xor_si256( h4h, m0h ); \
h4l = _mm256_xor_si256( h4l, m0l ); \
h5h = _mm256_xor_si256( h5h, m1h ); \
h5l = _mm256_xor_si256( h5l, m1l ); \
h6h = _mm256_xor_si256( h6h, m2h ); \
h6l = _mm256_xor_si256( h6l, m2l ); \
h7h = _mm256_xor_si256( h7h, m3h ); \
h7l = _mm256_xor_si256( h7l, m3l ); \
static const sph_u64 IV256[] = {
C64e(0xeb98a3412c20d3eb), C64e(0x92cdbe7b9cb245c1),
C64e(0x1c93519160d4c7fa), C64e(0x260082d67e508a03),
C64e(0xa4239e267726b945), C64e(0xe0fb1a48d41a9477),
C64e(0xcdb5ab26026b177a), C64e(0x56f024420fff2fa8),
C64e(0x71a396897f2e4d75), C64e(0x1d144908f77de262),
C64e(0x277695f776248f94), C64e(0x87d5b6574780296c),
C64e(0x5c5e272dac8e0d6c), C64e(0x518450c657057a0f),
C64e(0x7be4d367702412ea), C64e(0x89e3ab13d31cd769)
};
static const sph_u64 IV512[] = {
C64e(0x6fd14b963e00aa17), C64e(0x636a2e057a15d543),
C64e(0x8a225e8d0c97ef0b), C64e(0xe9341259f2b3c361),
C64e(0x891da0c1536f801e), C64e(0x2aa9056bea2b6d80),
C64e(0x588eccdb2075baa6), C64e(0xa90f3a76baf83bf7),
C64e(0x0169e60541e34a69), C64e(0x46b58a8e2e6fe65a),
C64e(0x1047a7d0c1843c24), C64e(0x3b6e71b12d5ac199),
C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
};
#else
#endif
#define SL(ro) SLu(r + ro, ro)
#define SLu(r, ro) do { \
S(h0, h2, h4, h6, Ceven_, r); \
S(h1, h3, h5, h7, Codd_, r); \
L(h0, h2, h4, h6, h1, h3, h5, h7); \
W ## ro(h1); \
W ## ro(h3); \
W ## ro(h5); \
W ## ro(h7); \
} while (0)
#if SPH_SMALL_FOOTPRINT_JH
#if SPH_JH_64
/*
* The "small footprint" 64-bit version just uses a partially unrolled
* loop.
*/
#define E8 do { \
unsigned r; \
for (r = 0; r < 42; r += 7) { \
SL(0); \
SL(1); \
SL(2); \
SL(3); \
SL(4); \
SL(5); \
SL(6); \
} \
} while (0)
#else
#endif
#else
#if SPH_JH_64
/*
* On a "true 64-bit" architecture, we can unroll at will.
*/
#define E8 do { \
SLu( 0, 0); \
SLu( 1, 1); \
SLu( 2, 2); \
SLu( 3, 3); \
SLu( 4, 4); \
SLu( 5, 5); \
SLu( 6, 6); \
SLu( 7, 0); \
SLu( 8, 1); \
SLu( 9, 2); \
SLu(10, 3); \
SLu(11, 4); \
SLu(12, 5); \
SLu(13, 6); \
SLu(14, 0); \
SLu(15, 1); \
SLu(16, 2); \
SLu(17, 3); \
SLu(18, 4); \
SLu(19, 5); \
SLu(20, 6); \
SLu(21, 0); \
SLu(22, 1); \
SLu(23, 2); \
SLu(24, 3); \
SLu(25, 4); \
SLu(26, 5); \
SLu(27, 6); \
SLu(28, 0); \
SLu(29, 1); \
SLu(30, 2); \
SLu(31, 3); \
SLu(32, 4); \
SLu(33, 5); \
SLu(34, 6); \
SLu(35, 0); \
SLu(36, 1); \
SLu(37, 2); \
SLu(38, 3); \
SLu(39, 4); \
SLu(40, 5); \
SLu(41, 6); \
} while (0)
#else
#endif
#endif
static void
jh_4way_init( jh_4way_context *sc, const void *iv )
{
uint64_t *v = (uint64_t*)iv;
for ( int i = 0; i < 16; i++ )
sc->H[i] = _mm256_set_epi64x( v[i], v[i], v[i], v[i] );
sc->ptr = 0;
sc->block_count = 0;
}
static void
jh_4way_core( jh_4way_context *sc, const void *data, size_t len )
{
__m256i *buf;
__m256i *vdata = (__m256i*)data;
const int buf_size = 64; // 64 * _m256i
size_t ptr;
DECL_STATE
buf = sc->buf;
ptr = sc->ptr;
if ( len < (buf_size - ptr) )
{
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
ptr += len;
sc->ptr = ptr;
return;
}
READ_STATE(sc);
while ( len > 0 )
{
size_t clen;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata += (clen>>3);
len -= clen;
if ( ptr == buf_size )
{
INPUT_BUF1;
E8;
INPUT_BUF2;
sc->block_count ++;
ptr = 0;
}
}
WRITE_STATE(sc);
sc->ptr = ptr;
}
static void
jh_4way_close( jh_4way_context *sc, unsigned ub, unsigned n, void *dst,
size_t out_size_w32, const void *iv )
{
__m256i buf[16*4];
__m256i *dst256 = (__m256i*)dst;
size_t numz, u;
sph_u64 l0, l1, l0e, l1e;
buf[0] = _mm256_set_epi64x( 0x80, 0x80, 0x80, 0x80 );
if ( sc->ptr == 0 )
numz = 48;
else
numz = 112 - sc->ptr;
memset_zero_m256i( buf+1, (numz>>3) - 1 );
l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3);
l1 = SPH_T64(sc->block_count >> 55);
sph_enc64be( &l0e, l0 );
sph_enc64be( &l1e, l1 );
*(buf + (numz>>3) ) = _mm256_set_epi64x( l1e, l1e, l1e, l1e );
*(buf + (numz>>3) + 1) = _mm256_set_epi64x( l0e, l0e, l0e, l0e );
jh_4way_core( sc, buf, numz + 16 );
for ( u=0; u < 8; u++ )
buf[u] = sc->H[u+8];
memcpy_m256i( dst256, buf, 8 );
}
void
jh256_4way_init(void *cc)
{
jh_4way_init(cc, IV256);
}
void
jh256_4way(void *cc, const void *data, size_t len)
{
jh_4way_core(cc, data, len);
}
void
jh256_4way_close(void *cc, void *dst)
{
jh_4way_close(cc, 0, 0, dst, 8, IV256);
}
void
jh512_4way_init(void *cc)
{
jh_4way_init(cc, IV512);
}
void
jh512_4way(void *cc, const void *data, size_t len)
{
jh_4way_core(cc, data, len);
}
void
jh512_4way_close(void *cc, void *dst)
{
jh_4way_close(cc, 0, 0, dst, 16, IV512);
}
#ifdef __cplusplus
}
#endif
#endif

100
algo/jh/jh-hash-4way.h Normal file
View File

@@ -0,0 +1,100 @@
/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */
/**
* JH interface. JH is a family of functions which differ by
* their output size; this implementation defines JH for output
* sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_jh.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef JH_HASH_4WAY_H__
#define JH_HASH_4WAY_H__
#ifdef __AVX2__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#define SPH_SIZE_jh256 256
#define SPH_SIZE_jh512 512
/**
* This structure is a context for JH computations: it contains the
* intermediate values and some data from the last entered block. Once
* a JH computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running JH computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
__m256i buf[8] __attribute__ ((aligned (64)));
__m256i H[16];
size_t ptr;
uint64_t block_count;
/*
unsigned char buf[64];
size_t ptr;
union {
sph_u64 wide[16];
} H;
sph_u64 block_count;
*/
} jh_4way_context;
typedef jh_4way_context jh256_4way_context;
typedef jh_4way_context jh512_4way_context;
void jh256_4way_init(void *cc);
void jh256_4way(void *cc, const void *data, size_t len);
void jh256_4way_close(void *cc, void *dst);
void jh512_4way_init(void *cc);
void jh512_4way(void *cc, const void *data, size_t len);
void jh512_4way_close(void *cc, void *dst);
#ifdef __cplusplus
}
#endif
#endif
#endif

228
algo/jh/jha-4way.c Normal file
View File

@@ -0,0 +1,228 @@
#if defined(JHA_4WAY)
#include "jha-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "avxdefs.h"
#include "algo/blake/blake-hash-4way.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
//static __thread keccak512_4way_context jha_kec_mid
// __attribute__ ((aligned (64)));
void jha_hash_4way( void *output, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
uint64_t vhasha[8*4] __attribute__ ((aligned (64)));
uint64_t vhashb[8*4] __attribute__ ((aligned (64)));
__m256i mask;
__m256i* vh256 = (__m256i*)vhash;
__m256i* vha256 = (__m256i*)vhasha;
__m256i* vhb256 = (__m256i*)vhashb;
blake512_4way_context ctx_blake;
hashState_groestl ctx_groestl;
jh512_4way_context ctx_jh;
skein512_4way_context ctx_skein;
keccak512_4way_context ctx_keccak;
keccak512_4way_init( &ctx_keccak );
keccak512_4way( &ctx_keccak, input, 80 );
keccak512_4way_close( &ctx_keccak, vhash );
// memcpy( &ctx_keccak, &jha_kec_mid, sizeof jha_kec_mid );
// keccak512_4way( &ctx_keccak, input+64, 16 );
// keccak512_4way_close( &ctx_keccak, vhash );
// Heavy & Light Pair Loop
for ( int round = 0; round < 3; round++ )
{
memset_zero_m256i( vha256, 20 );
memset_zero_m256i( vhb256, 20 );
mask = _mm256_sub_epi64( _mm256_and_si256( vh256[0],
mm256_vec_epi64( 0x1 ) ), mm256_vec_epi64( 0x1 ) );
// groestl (serial) v skein
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash0,
(char*)hash0, 512 );
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash1,
(char*)hash1, 512 );
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash2,
(char*)hash2, 512 );
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash3,
(char*)hash3, 512 );
m256_interleave_4x64( vhasha, hash0, hash1, hash2, hash3, 512 );
// skein
skein512_4way_init( &ctx_skein );
skein512_4way( &ctx_skein, vhash, 64 );
skein512_4way_close( &ctx_skein, vhashb );
// merge vectored hash
for ( int i = 0; i < 8; i++ )
{
vha256[i] = _mm256_maskload_epi64(
vhasha + i*4, mm256_bitnot(mask ) );
vhb256[i] = _mm256_maskload_epi64(
vhashb + i*4, mask );
vh256[i] = _mm256_or_si256( vha256[i], vhb256[i] );
}
// blake v jh
blake512_4way_init( &ctx_blake );
blake512_4way( &ctx_blake, vhash, 64 );
blake512_4way_close( &ctx_blake, vhasha );
jh512_4way_init( &ctx_jh );
jh512_4way( &ctx_jh, vhash, 64 );
jh512_4way_close( &ctx_jh, vhashb );
// merge vectored hash
for ( int i = 0; i < 8; i++ )
{
vha256[i] = _mm256_maskload_epi64(
vhasha + i*4, mm256_bitnot(mask ) );
vhb256[i] = _mm256_maskload_epi64(
vhashb + i*4, mask );
vh256[i] = _mm256_or_si256( vha256[i], vhb256[i] );
}
}
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = pdata[19];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
for ( int i=0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
uint64_t *edata = (uint64_t*)endiandata;
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
// precalc midstate for keccak
// keccak512_4way_init( &jha_kec_mid );
// keccak512_4way( &jha_kec_mid, vdata, 64 );
for ( int m = 0; m < 6; m++ )
{
if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
jha_hash_4way( hash, vdata );
pdata[19] = n;
if ( ( !(hash[7] & mask) )
&& fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( !((hash+8)[7] & mask) )
&& fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( !((hash+16)[7] & mask) )
&& fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( !((hash+24)[7] & mask) )
&& fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )
&& !work_restart[thr_id].restart );
break;
}
}
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

18
algo/jh/jha-gate.c Normal file
View File

@@ -0,0 +1,18 @@
#include "jha-gate.h"
bool register_jha_algo( algo_gate_t* gate )
{
//#if defined (JHA_4WAY)
// gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
// gate->scanhash = (void*)&scanhash_jha_4way;
// gate->hash = (void*)&jha_hash_4way;
//#else
gate->optimizations = SSE2_OPT | AES_OPT;
gate->scanhash = (void*)&scanhash_jha;
gate->hash = (void*)&jha_hash;
//#endif
gate->set_target = (void*)&scrypt_set_target;
return true;
};

27
algo/jh/jha-gate.h Normal file
View File

@@ -0,0 +1,27 @@
#ifndef JHA_GATE_H__
#define JHA_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX2__) && !defined(NO_AES_NI)
#define JHA_4WAY
#endif
//#if defined JHA_4WAY
//void jha_hash_4way( void *state, const void *input );
//int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
// uint64_t *hashes_done );
//#else
void jha_hash( void *state, const void *input );
int scanhash_jha( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
//#endif
#endif

155
algo/jh/jha.c Normal file
View File

@@ -0,0 +1,155 @@
#include "jha-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/blake/sph_blake.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#ifdef NO_AES_NI
#include "algo/groestl/sph_groestl.h"
#else
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
static __thread sph_keccak512_context jha_kec_mid __attribute__ ((aligned (64)));
void jha_kec_midstate( const void* input )
{
sph_keccak512_init( &jha_kec_mid );
sph_keccak512( &jha_kec_mid, input, 64 );
}
void jha_hash(void *output, const void *input)
{
uint8_t _ALIGN(128) hash[64];
#ifdef NO_AES_NI
sph_groestl512_context ctx_groestl;
#else
hashState_groestl ctx_groestl;
#endif
sph_blake512_context ctx_blake;
sph_jh512_context ctx_jh;
sph_keccak512_context ctx_keccak;
sph_skein512_context ctx_skein;
memcpy( &ctx_keccak, &jha_kec_mid, sizeof jha_kec_mid );
sph_keccak512(&ctx_keccak, input+64, 16 );
sph_keccak512_close(&ctx_keccak, hash );
// Heavy & Light Pair Loop
for (int round = 0; round < 3; round++)
{
if (hash[0] & 0x01)
{
#ifdef NO_AES_NI
sph_groestl512_init(&ctx_groestl);
sph_groestl512(&ctx_groestl, hash, 64 );
sph_groestl512_close(&ctx_groestl, hash );
#else
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash,
(char*)hash, 512 );
#endif
}
else
{
sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, hash, 64);
sph_skein512_close(&ctx_skein, hash );
}
if (hash[0] & 0x01)
{
sph_blake512_init(&ctx_blake);
sph_blake512(&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, hash );
}
else
{
sph_jh512_init(&ctx_jh);
sph_jh512(&ctx_jh, hash, 64 );
sph_jh512_close(&ctx_jh, hash );
}
}
memcpy(output, hash, 32);
}
int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
{
uint32_t _ALIGN(128) hash32[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t n = pdata[19] - 1;
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
for (int i=0; i < 19; i++) {
be32enc(&endiandata[i], pdata[i]);
}
jha_kec_midstate( endiandata );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
jha_hash(hash32, endiandata);
#ifndef DEBUG_ALGO
if ((!(hash32[7] & mask)) && fulltest(hash32, ptarget)) {
work_set_target_ratio(work, hash32);
*hashes_done = n - first_nonce + 1;
return 1;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash32[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash32, ptarget)) {
work_set_target_ratio(work, hash32);
*hashes_done = n - first_nonce + 1;
return 1;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}

View File

@@ -914,6 +914,7 @@ jh_core(sph_jh_context *sc, const void *data, size_t len)
buf = sc->buf;
ptr = sc->ptr;
if (len < (sizeof sc->buf) - ptr) {
memcpy(buf + ptr, data, len);
ptr += len;

View File

@@ -22,15 +22,12 @@
*/
#include <emmintrin.h>
#include <stdint.h>
#include <string.h>
#include "algo/sha/sha3-defs.h"
typedef __m128i word128; /*word128 defines a 128-bit SSE2 word*/
typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
typedef enum {jhSUCCESS = 0, jhFAIL = 1, jhBAD_HASHLEN = 2} jhReturn;
/*define data alignment for different C compilers*/

104
algo/keccak/keccak-4way.c Normal file
View File

@@ -0,0 +1,104 @@
#include "keccak-gate.h"
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "sph_keccak.h"
#include "keccak-hash-4way.h"
#ifdef KECCAK_4WAY
void keccakhash_4way(void *state, const void *input)
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
keccak256_4way_context ctx;
keccak256_4way_init( &ctx );
keccak256_4way( &ctx, input, 80 );
keccak256_4way_close( &ctx, vhash );
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
memcpy( state, hash0, 32 );
memcpy( state+32, hash1, 32 );
memcpy( state+64, hash2, 32 );
memcpy( state+96, hash3, 32 );
}
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
// const uint32_t Htarg = ptarget[7];
uint32_t endiandata[20];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
for ( int i=0; i < 19; i++ )
be32enc( &endiandata[i], pdata[i] );
uint64_t *edata = (uint64_t*)endiandata;
m256_interleave_4x64x( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
keccakhash_4way( hash, vdata );
if ( ( ( hash[7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash, ptarget) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
}
if ( ( ( (hash+8)[7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash+8, ptarget) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
}
if ( ( ( (hash+16) [7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash+16, ptarget) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
}
if ( ( ( (hash+24)[7] & 0xFFFFFF00 ) == 0 )
&& fulltest( hash+24, ptarget) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)
&& !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

27
algo/keccak/keccak-gate.c Normal file
View File

@@ -0,0 +1,27 @@
#include "keccak-gate.h"
void keccak_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (128.0 * opt_diff_factor) );
}
int64_t keccak_get_max64() { return 0x7ffffLL; }
bool register_keccak_algo( algo_gate_t* gate )
{
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->set_target = (void*)&keccak_set_target;
gate->get_max64 = (void*)&keccak_get_max64;
#if defined (KECCAK_4WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_keccak_4way;
gate->hash = (void*)&keccakhash_4way;
#else
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_keccak;
gate->hash = (void*)&keccakhash;
#endif
return true;
};

23
algo/keccak/keccak-gate.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef __KECCAK_GATE_H__
#define __KECCAK_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX2__)
#define KECCAK_4WAY
#endif
#if defined(KECCAK_4WAY)
void keccakhash_4way( void *state, const void *input );
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void keccakhash( void *state, const void *input );
int scanhash_keccak( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -0,0 +1,507 @@
#include <stddef.h>
#include "keccak-hash-4way.h"
#if defined(__AVX2__)
static const sph_u64 RC[] = {
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
};
#define a00 (kc->w[ 0])
#define a10 (kc->w[ 1])
#define a20 (kc->w[ 2])
#define a30 (kc->w[ 3])
#define a40 (kc->w[ 4])
#define a01 (kc->w[ 5])
#define a11 (kc->w[ 6])
#define a21 (kc->w[ 7])
#define a31 (kc->w[ 8])
#define a41 (kc->w[ 9])
#define a02 (kc->w[10])
#define a12 (kc->w[11])
#define a22 (kc->w[12])
#define a32 (kc->w[13])
#define a42 (kc->w[14])
#define a03 (kc->w[15])
#define a13 (kc->w[16])
#define a23 (kc->w[17])
#define a33 (kc->w[18])
#define a43 (kc->w[19])
#define a04 (kc->w[20])
#define a14 (kc->w[21])
#define a24 (kc->w[22])
#define a34 (kc->w[23])
#define a44 (kc->w[24])
#define DECL_STATE
#define READ_STATE(sc)
#define WRITE_STATE(sc)
#define INPUT_BUF(size) do { \
size_t j; \
for (j = 0; j < (size>>3); j++ ) \
kc->w[j ] = _mm256_xor_si256( kc->w[j], buf[j] ); \
} while (0)
#define mm256_neg1 \
(_mm256_set_epi64x( 0xffffffffffffffff, 0xffffffffffffffff, \
0xffffffffffffffff, 0xffffffffffffffff ) )
#define DECL64(x) __m256i x
#define MOV64(d, s) (d = s)
#define XOR64(d, a, b) (d = _mm256_xor_si256(a,b))
#define AND64(d, a, b) (d = _mm256_and_si256(a,b))
#define OR64(d, a, b) (d = _mm256_or_si256(a,b))
#define NOT64(d, s) (d = _mm256_xor_si256(s,mm256_neg1))
#define ROL64(d, v, n) (d = mm256_rotl_64(v, n))
#define XOR64_IOTA XOR64
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
DECL64(tt0); \
DECL64(tt1); \
DECL64(tt2); \
DECL64(tt3); \
XOR64(tt0, d0, d1); \
XOR64(tt1, d2, d3); \
XOR64(tt0, tt0, d4); \
XOR64(tt0, tt0, tt1); \
ROL64(tt0, tt0, 1); \
XOR64(tt2, c0, c1); \
XOR64(tt3, c2, c3); \
XOR64(tt0, tt0, c4); \
XOR64(tt2, tt2, tt3); \
XOR64(t, tt0, tt2); \
} while (0)
#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
b40, b41, b42, b43, b44) \
do { \
DECL64(t0); \
DECL64(t1); \
DECL64(t2); \
DECL64(t3); \
DECL64(t4); \
TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
XOR64(b00, b00, t0); \
XOR64(b01, b01, t0); \
XOR64(b02, b02, t0); \
XOR64(b03, b03, t0); \
XOR64(b04, b04, t0); \
XOR64(b10, b10, t1); \
XOR64(b11, b11, t1); \
XOR64(b12, b12, t1); \
XOR64(b13, b13, t1); \
XOR64(b14, b14, t1); \
XOR64(b20, b20, t2); \
XOR64(b21, b21, t2); \
XOR64(b22, b22, t2); \
XOR64(b23, b23, t2); \
XOR64(b24, b24, t2); \
XOR64(b30, b30, t3); \
XOR64(b31, b31, t3); \
XOR64(b32, b32, t3); \
XOR64(b33, b33, t3); \
XOR64(b34, b34, t3); \
XOR64(b40, b40, t4); \
XOR64(b41, b41, t4); \
XOR64(b42, b42, t4); \
XOR64(b43, b43, t4); \
XOR64(b44, b44, t4); \
} while (0)
#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
b40, b41, b42, b43, b44) \
do { \
/* ROL64(b00, b00, 0); */ \
ROL64(b01, b01, 36); \
ROL64(b02, b02, 3); \
ROL64(b03, b03, 41); \
ROL64(b04, b04, 18); \
ROL64(b10, b10, 1); \
ROL64(b11, b11, 44); \
ROL64(b12, b12, 10); \
ROL64(b13, b13, 45); \
ROL64(b14, b14, 2); \
ROL64(b20, b20, 62); \
ROL64(b21, b21, 6); \
ROL64(b22, b22, 43); \
ROL64(b23, b23, 15); \
ROL64(b24, b24, 61); \
ROL64(b30, b30, 28); \
ROL64(b31, b31, 55); \
ROL64(b32, b32, 25); \
ROL64(b33, b33, 21); \
ROL64(b34, b34, 56); \
ROL64(b40, b40, 27); \
ROL64(b41, b41, 20); \
ROL64(b42, b42, 39); \
ROL64(b43, b43, 8); \
ROL64(b44, b44, 14); \
} while (0)
/*
* The KHI macro integrates the "lane complement" optimization. On input,
* some words are complemented:
* a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
* On output, the following words are complemented:
* a04 a10 a20 a22 a23 a31
*
* The (implicit) permutation and the theta expansion will bring back
* the input mask for the next round.
*/
#define KHI_XO(d, a, b, c) do { \
DECL64(kt); \
OR64(kt, b, c); \
XOR64(d, a, kt); \
} while (0)
#define KHI_XA(d, a, b, c) do { \
DECL64(kt); \
AND64(kt, b, c); \
XOR64(d, a, kt); \
} while (0)
#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
b40, b41, b42, b43, b44) \
do { \
DECL64(c0); \
DECL64(c1); \
DECL64(c2); \
DECL64(c3); \
DECL64(c4); \
DECL64(bnn); \
NOT64(bnn, b20); \
KHI_XO(c0, b00, b10, b20); \
KHI_XO(c1, b10, bnn, b30); \
KHI_XA(c2, b20, b30, b40); \
KHI_XO(c3, b30, b40, b00); \
KHI_XA(c4, b40, b00, b10); \
MOV64(b00, c0); \
MOV64(b10, c1); \
MOV64(b20, c2); \
MOV64(b30, c3); \
MOV64(b40, c4); \
NOT64(bnn, b41); \
KHI_XO(c0, b01, b11, b21); \
KHI_XA(c1, b11, b21, b31); \
KHI_XO(c2, b21, b31, bnn); \
KHI_XO(c3, b31, b41, b01); \
KHI_XA(c4, b41, b01, b11); \
MOV64(b01, c0); \
MOV64(b11, c1); \
MOV64(b21, c2); \
MOV64(b31, c3); \
MOV64(b41, c4); \
NOT64(bnn, b32); \
KHI_XO(c0, b02, b12, b22); \
KHI_XA(c1, b12, b22, b32); \
KHI_XA(c2, b22, bnn, b42); \
KHI_XO(c3, bnn, b42, b02); \
KHI_XA(c4, b42, b02, b12); \
MOV64(b02, c0); \
MOV64(b12, c1); \
MOV64(b22, c2); \
MOV64(b32, c3); \
MOV64(b42, c4); \
NOT64(bnn, b33); \
KHI_XA(c0, b03, b13, b23); \
KHI_XO(c1, b13, b23, b33); \
KHI_XO(c2, b23, bnn, b43); \
KHI_XA(c3, bnn, b43, b03); \
KHI_XO(c4, b43, b03, b13); \
MOV64(b03, c0); \
MOV64(b13, c1); \
MOV64(b23, c2); \
MOV64(b33, c3); \
MOV64(b43, c4); \
NOT64(bnn, b14); \
KHI_XA(c0, b04, bnn, b24); \
KHI_XO(c1, bnn, b24, b34); \
KHI_XA(c2, b24, b34, b44); \
KHI_XO(c3, b34, b44, b04); \
KHI_XA(c4, b44, b04, b14); \
MOV64(b04, c0); \
MOV64(b14, c1); \
MOV64(b24, c2); \
MOV64(b34, c3); \
MOV64(b44, c4); \
} while (0)
#define IOTA(r) XOR64_IOTA(a00, a00, r)
#define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
#define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
#define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
#define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
#define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
#define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
#define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
#define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
#define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
#define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
#define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
#define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
#define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
#define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
#define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
#define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
#define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
#define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
#define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
#define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
#define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
#define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
#define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
#define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
#define P8_TO_P0 do { \
DECL64(t); \
MOV64(t, a01); \
MOV64(a01, a11); \
MOV64(a11, a43); \
MOV64(a43, t); \
MOV64(t, a02); \
MOV64(a02, a22); \
MOV64(a22, a31); \
MOV64(a31, t); \
MOV64(t, a03); \
MOV64(a03, a33); \
MOV64(a33, a24); \
MOV64(a24, t); \
MOV64(t, a04); \
MOV64(a04, a44); \
MOV64(a44, a12); \
MOV64(a12, t); \
MOV64(t, a10); \
MOV64(a10, a32); \
MOV64(a32, a13); \
MOV64(a13, t); \
MOV64(t, a14); \
MOV64(a14, a21); \
MOV64(a21, a20); \
MOV64(a20, t); \
MOV64(t, a23); \
MOV64(a23, a42); \
MOV64(a42, a40); \
MOV64(a40, t); \
MOV64(t, a30); \
MOV64(a30, a41); \
MOV64(a41, a34); \
MOV64(a34, t); \
} while (0)
#define LPAR (
#define RPAR )
#define KF_ELT(r, s, k) do { \
THETA LPAR P ## r RPAR; \
RHO LPAR P ## r RPAR; \
KHI LPAR P ## s RPAR; \
IOTA(k); \
} while (0)
#define DO(x) x
#define KECCAK_F_1600 DO(KECCAK_F_1600_)
#define KECCAK_F_1600_ do { \
int j; \
for (j = 0; j < 24; j += 8) \
{ \
KF_ELT( 0, 1, (_mm256_set_epi64x( RC[j + 0], RC[j + 0], \
RC[j + 0], RC[j + 0])) ); \
KF_ELT( 1, 2, (_mm256_set_epi64x( RC[j + 1], RC[j + 1], \
RC[j + 1], RC[j + 1])) ); \
KF_ELT( 2, 3, (_mm256_set_epi64x( RC[j + 2], RC[j + 2], \
RC[j + 2], RC[j + 2])) ); \
KF_ELT( 3, 4, (_mm256_set_epi64x( RC[j + 3], RC[j + 3], \
RC[j + 3], RC[j + 3])) ); \
KF_ELT( 4, 5, (_mm256_set_epi64x( RC[j + 4], RC[j + 4], \
RC[j + 4], RC[j + 4])) ); \
KF_ELT( 5, 6, (_mm256_set_epi64x( RC[j + 5], RC[j + 5], \
RC[j + 5], RC[j + 5])) ); \
KF_ELT( 6, 7, (_mm256_set_epi64x( RC[j + 6], RC[j + 6], \
RC[j + 6], RC[j + 6])) ); \
KF_ELT( 7, 8, (_mm256_set_epi64x( RC[j + 7], RC[j + 7], \
RC[j + 7], RC[j + 7])) ); \
P8_TO_P0; \
} \
} while (0)
static void keccak64_init( keccak64_ctx_m256i *kc, unsigned out_size )
{
int i;
for (i = 0; i < 25; i ++)
kc->w[i] = _mm256_setzero_si256();
// Initialization for the "lane complement".
kc->w[ 1] = mm256_neg1;
kc->w[ 2] = mm256_neg1;
kc->w[ 8] = mm256_neg1;
kc->w[12] = mm256_neg1;
kc->w[17] = mm256_neg1;
kc->w[20] = mm256_neg1;
kc->ptr = 0;
kc->lim = 200 - (out_size >> 2);
}
static void
keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
size_t lim )
{
__m256i *buf;
__m256i *vdata = (__m256i*)data;
size_t ptr;
DECL_STATE
buf = kc->buf;
ptr = kc->ptr;
if ( len < (lim - ptr) )
{
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
kc->ptr = ptr + len;
return;
}
READ_STATE( kc );
while ( len > 0 )
{
size_t clen;
clen = (lim - ptr);
if ( clen > len )
clen = len;
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata = vdata + (clen>>3);
len -= clen;
if ( ptr == lim )
{
INPUT_BUF( lim );
KECCAK_F_1600;
ptr = 0;
}
}
WRITE_STATE( kc );
kc->ptr = ptr;
}
static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
size_t lim )
{
unsigned eb;
union {
__m256i tmp[lim + 1];
sph_u64 dummy; /* for alignment */
} u;
size_t j;
size_t m256_len = byte_len >> 3;
eb = 0x100 >> 8;
if ( kc->ptr == (lim - 8) )
{
uint64_t t = eb | 0x8000000000000000;
u.tmp[0] = _mm256_set_epi64x( t, t, t, t );
j = 8;
}
else
{
j = lim - kc->ptr;
u.tmp[0] = _mm256_set_epi64x( eb, eb, eb, eb );
memset_zero_m256i( u.tmp + 1, (j>>3) - 2 );
u.tmp[ (j>>3) - 1] = _mm256_set_epi64x( 0x8000000000000000,
0x8000000000000000, 0x8000000000000000, 0x8000000000000000);
}
keccak64_core( kc, u.tmp, j, lim );
/* Finalize the "lane complement" */
NOT64( kc->w[ 1], kc->w[ 1] );
NOT64( kc->w[ 2], kc->w[ 2] );
NOT64( kc->w[ 8], kc->w[ 8] );
NOT64( kc->w[12], kc->w[12] );
NOT64( kc->w[17], kc->w[17] );
NOT64( kc->w[20], kc->w[20] );
for ( j = 0; j < m256_len; j++ )
u.tmp[j] = kc->w[j];
memcpy_m256i( dst, u.tmp, m256_len );
}
void keccak256_4way_init( void *kc )
{
keccak64_init( kc, 256 );
}
void
keccak256_4way(void *cc, const void *data, size_t len)
{
keccak64_core(cc, data, len, 136);
}
void
keccak256_4way_close(void *cc, void *dst)
{
keccak64_close(cc, dst, 32, 136);
}
void keccak512_4way_init( void *kc )
{
keccak64_init( kc, 512 );
}
void
keccak512_4way(void *cc, const void *data, size_t len)
{
keccak64_core(cc, data, len, 72);
}
void
keccak512_4way_close(void *cc, void *dst)
{
keccak64_close(cc, dst, 64, 72);
}
#endif

View File

@@ -0,0 +1,94 @@
/* $Id: sph_keccak.h 216 2010-06-08 09:46:57Z tp $ */
/**
* Keccak interface. This is the interface for Keccak with the
* recommended parameters for SHA-3, with output lengths 224, 256,
* 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_keccak.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef KECCAK_HASH_4WAY_H__
#define KECCAK_HASH_4WAY_H__
#ifdef __cplusplus
extern "C"{
#endif
#ifdef __AVX2__
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#define SPH_SIZE_keccak256 256
/**
* Output size (in bits) for Keccak-512.
*/
#define SPH_SIZE_keccak512 512
/**
* This structure is a context for Keccak computations: it contains the
* intermediate values and some data from the last entered block. Once a
* Keccak computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running Keccak computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
__m256i buf[144*8]; /* first field, for alignment */
__m256i w[25];
size_t ptr, lim;
// sph_u64 wide[25];
} keccak64_ctx_m256i;
typedef keccak64_ctx_m256i keccak256_4way_context;
typedef keccak64_ctx_m256i keccak512_4way_context;
void keccak256_4way_init(void *cc);
void keccak256_4way(void *cc, const void *data, size_t len);
void keccak256_4way_close(void *cc, void *dst);
void keccak512_4way_init(void *cc);
void keccak512_4way(void *cc, const void *data, size_t len);
void keccak512_4way_close(void *cc, void *dst);
void keccak512_4way_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>
@@ -51,17 +50,3 @@ int scanhash_keccak(int thr_id, struct work *work,
return 0;
}
void keccak_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (128.0 * opt_diff_factor) );
}
bool register_keccak_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_keccak;
gate->hash = (void*)&keccakhash;
gate->gen_merkle_root = (void*)&SHA256_gen_merkle_root;
gate->set_target = (void*)&keccak_set_target;
return true;
};

View File

@@ -955,6 +955,7 @@ static const struct {
#endif
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
DECL64(tt0); \
DECL64(tt1); \
@@ -1643,8 +1644,7 @@ keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)
for (j = 0; j < d; j += 8) \
sph_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \
memcpy(dst, u.tmp, d); \
keccak_init(kc, (unsigned)d << 3); \
} \
}
#else

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
@@ -6,9 +5,7 @@
#include <stdio.h>
#include "ripemd/sph_ripemd.h"
#include "sha/sph_sha2.h"
#if defined __SHA__
#include <openssl/sha.h>
#endif
#include <openssl/sha.h>
#define LBRY_NTIME_INDEX 25
#define LBRY_NBITS_INDEX 26
@@ -19,18 +16,19 @@
void lbry_hash(void* output, const void* input)
{
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_CTX ctx_sha256 __attribute__ ((aligned (64)));
SHA512_CTX ctx_sha512 __attribute__ ((aligned (64)));
#else
sph_sha256_context ctx_sha256 __attribute__ ((aligned (64)));
#endif
sph_sha512_context ctx_sha512 __attribute__ ((aligned (64)));
#endif
sph_ripemd160_context ctx_ripemd __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) hashA[16];
uint32_t _ALIGN(64) hashB[16];
uint32_t _ALIGN(64) hashC[16];
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, input, 112 );
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
@@ -38,6 +36,10 @@ void lbry_hash(void* output, const void* input)
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, hashA, 32 );
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
SHA512_Init( &ctx_sha512 );
SHA512_Update( &ctx_sha512, hashA, 32 );
SHA512_Final( (unsigned char*) hashA, &ctx_sha512 );
#else
sph_sha256_init( &ctx_sha256 );
sph_sha256 ( &ctx_sha256, input, 112 );
@@ -46,11 +48,11 @@ void lbry_hash(void* output, const void* input)
sph_sha256_init( &ctx_sha256 );
sph_sha256 ( &ctx_sha256, hashA, 32 );
sph_sha256_close( &ctx_sha256, hashA );
#endif
sph_sha512_init( &ctx_sha512 );
sph_sha512 ( &ctx_sha512, hashA, 32 );
sph_sha512_close( &ctx_sha512, hashA );
sph_sha512_close( &ctx_sha512, hashA );
#endif
sph_ripemd160_init( &ctx_ripemd );
sph_ripemd160 ( &ctx_ripemd, hashA, 32 );
@@ -60,7 +62,7 @@ void lbry_hash(void* output, const void* input)
sph_ripemd160 ( &ctx_ripemd, hashA+8, 32 );
sph_ripemd160_close( &ctx_ripemd, hashC );
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, hashB, 20 );
SHA256_Update( &ctx_sha256, hashC, 20 );
@@ -219,7 +221,7 @@ int64_t lbry_get_max64() { return 0x1ffffLL; }
bool register_lbry_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_lbry;
gate->hash = (void*)&lbry_hash;
gate->calc_network_diff = (void*)&lbry_calc_network_diff;

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>

View File

@@ -21,8 +21,9 @@
#define LYRA2_H_
#include <stdint.h>
#include "algo/sha/sha3-defs.h"
typedef unsigned char byte;
//typedef unsigned char byte;
//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t)

View File

@@ -1,6 +1,5 @@
#include <memory.h>
#include "miner.h"
#include "algo/blake/sph_blake.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/skein/sph_skein.h"

View File

@@ -1,6 +1,5 @@
#include <memory.h>
#include "miner.h"
#include "algo-gate-api.h"
#include "algo/blake/sph_blake.h"

View File

@@ -1,5 +1,4 @@
#include <memory.h>
#include "miner.h"
#include "algo-gate-api.h"
#include "lyra2.h"
#include "avxdefs.h"

View File

@@ -1,6 +1,5 @@
#include <memory.h>
#include <mm_malloc.h>
#include "miner.h"
#include "algo-gate-api.h"
#include "lyra2.h"
#include "algo/blake/sph_blake.h"

View File

@@ -1,5 +1,4 @@
#include "cpuminer-config.h"
#include "miner.h"
#include "algo-gate-api.h"
#include <gmp.h>
@@ -14,9 +13,7 @@
#include "algo/tiger/sph_tiger.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/ripemd/sph_ripemd.h"
#if defined __SHA__
#include <openssl/sha.h>
#endif
#include <openssl/sha.h>
#define EPSa DBL_EPSILON
@@ -120,12 +117,13 @@ uint32_t sw2_(int nnounce)
}
typedef struct {
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_CTX sha256;
SHA512_CTX sha512;
#else
sph_sha256_context sha256;
#endif
sph_sha512_context sha512;
#endif
sph_keccak512_context keccak;
sph_whirlpool_context whirlpool;
sph_haval256_5_context haval;
@@ -137,12 +135,13 @@ m7m_ctx_holder m7m_ctx;
void init_m7m_ctx()
{
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Init( &m7m_ctx.sha256 );
SHA512_Init( &m7m_ctx.sha512 );
#else
sph_sha256_init( &m7m_ctx.sha256 );
#endif
sph_sha512_init( &m7m_ctx.sha512 );
#endif
sph_keccak512_init( &m7m_ctx.keccak );
sph_whirlpool_init( &m7m_ctx.whirlpool );
sph_haval256_5_init( &m7m_ctx.haval );
@@ -177,7 +176,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
m7m_ctx_holder ctx1, ctx2 __attribute__ ((aligned (64)));
memcpy( &ctx1, &m7m_ctx, sizeof(m7m_ctx) );
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_CTX ctxf_sha256;
#else
sph_sha256_context ctxf_sha256;
@@ -185,18 +184,20 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
memcpy(data, pdata, 80);
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Update( &ctx1.sha256, data, M7_MIDSTATE_LEN );
SHA512_Update( &ctx1.sha512, data, M7_MIDSTATE_LEN );
#else
sph_sha256( &ctx1.sha256, data, M7_MIDSTATE_LEN );
#endif
sph_sha512( &ctx1.sha512, data, M7_MIDSTATE_LEN );
#endif
sph_keccak512( &ctx1.keccak, data, M7_MIDSTATE_LEN );
sph_whirlpool( &ctx1.whirlpool, data, M7_MIDSTATE_LEN );
sph_haval256_5( &ctx1.haval, data, M7_MIDSTATE_LEN );
sph_tiger( &ctx1.tiger, data, M7_MIDSTATE_LEN );
sph_ripemd160( &ctx1.ripemd, data, M7_MIDSTATE_LEN );
// the following calculations can be performed once and the results shared
mpz_t magipi, magisw, product, bns0, bns1;
mpf_t magifpi, magifpi0, mpt1, mpt2, mptmp, mpten;
@@ -221,16 +222,22 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
memcpy( &ctx2, &ctx1, sizeof(m7m_ctx) );
#if defined __SHA__
// with 4 way can a single midstate be shared among lanes?
// do sinlge round of midstate and inyerleave for final
#ifndef USE_SPH_SHA
SHA256_Update( &ctx2.sha256, data_p64, 80 - M7_MIDSTATE_LEN );
SHA256_Final( (unsigned char*) (bhash[0]), &ctx2.sha256 );
SHA512_Update( &ctx2.sha512, data_p64, 80 - M7_MIDSTATE_LEN );
SHA512_Final( (unsigned char*) (bhash[1]), &ctx2.sha512 );
#else
sph_sha256( &ctx2.sha256, data_p64, 80 - M7_MIDSTATE_LEN );
sph_sha256_close( &ctx2.sha256, (void*)(bhash[0]) );
#endif
sph_sha512( &ctx2.sha512, data_p64, 80 - M7_MIDSTATE_LEN );
sph_sha512_close( &ctx2.sha512, (void*)(bhash[1]) );
#endif
sph_keccak512( &ctx2.keccak, data_p64, 80 - M7_MIDSTATE_LEN );
sph_keccak512_close( &ctx2.keccak, (void*)(bhash[2]) );
@@ -246,6 +253,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
sph_ripemd160( &ctx2.ripemd, data_p64, 80 - M7_MIDSTATE_LEN );
sph_ripemd160_close( &ctx2.ripemd, (void*)(bhash[6]) );
// 4 way serial
mpz_import(bns0, a, -1, p, -1, 0, bhash[0]);
mpz_set(bns1, bns0);
mpz_set(product, bns0);
@@ -261,7 +269,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
bytes = mpz_sizeinbase(product, 256);
mpz_export((void *)bdata, NULL, -1, 1, 0, 0, product);
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Init( &ctxf_sha256 );
SHA256_Update( &ctxf_sha256, bdata, bytes );
SHA256_Final( (unsigned char*) hash, &ctxf_sha256 );
@@ -271,6 +279,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
sph_sha256_close( &ctxf_sha256, (void*)(hash) );
#endif
// do once and share
digits=(int)((sqrt((double)(n/2))*(1.+EPS))/9000+75);
mp_bitcnt_t prec = (long int)(digits*BITS_PER_DIGIT+16);
mpf_set_prec_raw(magifpi, prec);
@@ -293,7 +302,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
mpz_set_f(magipi, magifpi);
mpz_add(magipi,magipi,magisw);
mpz_add(product,product,magipi);
// share magipi, product and do serial
mpz_import(bns0, b, -1, p, -1, 0, (void*)(hash));
mpz_add(bns1, bns1, bns0);
mpz_mul(product,product,bns1);
@@ -303,7 +312,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
mpzscale=bytes;
mpz_export(bdata, NULL, -1, 1, 0, 0, product);
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Init( &ctxf_sha256 );
SHA256_Update( &ctxf_sha256, bdata, bytes );
SHA256_Final( (unsigned char*) hash, &ctxf_sha256 );
@@ -314,6 +323,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
#endif
}
// this is the scanhash part
const unsigned char *hash_ = (const unsigned char *)hash;
const unsigned char *target_ = (const unsigned char *)ptarget;
for ( i = 31; i >= 0; i-- )
@@ -343,6 +353,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work,
pdata[19] = n;
// do this in hashm7m
out:
mpf_set_prec_raw(magifpi, prec0);
mpf_set_prec_raw(magifpi0, prec0);
@@ -361,21 +372,17 @@ out:
return rc;
}
void m7m_reverse_endian( struct work *work )
{
swab32_array( work->data, work->data, 20 );
}
bool register_m7m_algo( algo_gate_t *gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
init_m7m_ctx();
gate->scanhash = (void*)scanhash_m7m_hash;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&get_max64_0x1ffff;
gate->set_work_data_endian = (void*)&m7m_reverse_endian;
gate->work_data_size = 80;
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
return true;
}

View File

@@ -31,7 +31,6 @@
#include <string.h>
#include <unistd.h>
#include "miner.h"
#include "algo-gate-api.h"
#define USE_CUSTOM_BLAKE2S
@@ -1089,7 +1088,9 @@ bool register_neoscrypt_algo( algo_gate_t* gate )
gate->set_target = (void*)&scrypt_set_target;
gate->wait_for_diff = (void*)&neoscrypt_wait_for_diff;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->set_work_data_endian = (void*)&swab_work_data;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
gate->work_data_size = 80;
return true;
};

178
algo/nist5/nist5-4way.c Normal file
View File

@@ -0,0 +1,178 @@
#include "nist5-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#if defined(NIST5_4WAY)
#include "algo/blake/blake-hash-4way.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
#include "algo/keccak/keccak-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
// no improvement with midstate
//static __thread blake512_4way_context ctx_mid;
void nist5hash_4way( void *output, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
blake512_4way_context ctx_blake;
hashState_groestl ctx_groestl;
jh512_4way_context ctx_jh;
skein512_4way_context ctx_skein;
keccak512_4way_context ctx_keccak;
// memcpy( &ctx_blake, &ctx_mid, sizeof(ctx_mid) );
// blake512_4way( &ctx_blake, input + (64<<2), 16 );
blake512_4way_init( &ctx_blake );
blake512_4way( &ctx_blake, input, 80 );
blake512_4way_close( &ctx_blake, vhash );
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash0,
(const char*)hash0, 512 );
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash1,
(const char*)hash1, 512 );
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash2,
(const char*)hash2, 512 );
init_groestl( &ctx_groestl, 64 );
update_and_final_groestl( &ctx_groestl, (char*)hash3,
(const char*)hash3, 512 );
m256_interleave_4x64x( vhash, hash0, hash1, hash2, hash3, 512 );
jh512_4way_init( &ctx_jh );
jh512_4way( &ctx_jh, vhash, 64 );
jh512_4way_close( &ctx_jh, vhash );
keccak512_4way_init( &ctx_keccak );
keccak512_4way( &ctx_keccak, vhash, 64 );
keccak512_4way_close( &ctx_keccak, vhash );
skein512_4way_init( &ctx_skein );
skein512_4way( &ctx_skein, vhash, 64 );
skein512_4way_close( &ctx_skein, vhash );
m256_deinterleave_4x64x( hash0, hash1, hash2, hash3, vhash, 512 );
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
uint64_t htmax[] = { 0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0 };
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
uint64_t *edata = (uint64_t*)endiandata;
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
// precalc midstate
// blake512_4way_init( &ctx_mid );
// blake512_4way( &ctx_mid, vdata, 64 );
for ( int m=0; m < 6; m++ )
{
if (Htarg <= htmax[m])
{
uint32_t mask = masks[m];
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
nist5hash_4way( hash, vdata );
pdata[19] = n;
if ( ( !(hash[7] & mask) )
&& fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( ( !((hash+8)[7] & mask) )
&& fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( ( !((hash+16)[7] & mask) )
&& fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( ( !((hash+24)[7] & mask) )
&& fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )
&& !work_restart[thr_id].restart );
break;
}
}
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

17
algo/nist5/nist5-gate.c Normal file
View File

@@ -0,0 +1,17 @@
#include "nist5-gate.h"
bool register_nist5_algo( algo_gate_t* gate )
{
#if defined (NIST5_4WAY)
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_nist5_4way;
gate->hash = (void*)&nist5hash_4way;
#else
gate->optimizations = SSE2_OPT | AES_OPT;
init_nist5_ctx();
gate->scanhash = (void*)&scanhash_nist5;
gate->hash = (void*)&nist5hash;
#endif
return true;
};

26
algo/nist5/nist5-gate.h Normal file
View File

@@ -0,0 +1,26 @@
#ifndef __NIST5_GATE_H__
#define __NIST5_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX2__) && !defined(NO_AES_NI)
#define NIST5_4WAY
#endif
#if defined(NIST5_4WAY)
void nist5hash_4way( void *state, const void *input );
int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#else
void nist5hash( void *state, const void *input );
int scanhash_nist5( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
#endif

View File

@@ -1,5 +1,4 @@
#include "miner.h"
#include "algo-gate-api.h"
#include "nist5-gate.h"
#include <stdlib.h>
#include <stdint.h>
@@ -148,7 +147,7 @@ int scanhash_nist5(int thr_id, struct work *work,
pdata[19] = n;
return 0;
}
/*
bool register_nist5_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT;
@@ -157,4 +156,4 @@ bool register_nist5_algo( algo_gate_t* gate )
gate->hash = (void*)&nist5hash;
return true;
};
*/

View File

@@ -25,7 +25,6 @@
*/
#include "cpuminer-config.h"
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>

View File

@@ -0,0 +1,12 @@
#include "polytimos-gate.h"
bool register_polytimos_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
init_polytimos_context();
gate->scanhash = (void*)&scanhash_polytimos;
gate->hash = (void*)&polytimos_hash;
gate->get_max64 = (void*)&get_max64_0x3ffff;
return true;
};

View File

@@ -0,0 +1,12 @@
#ifndef __POLYTIMOS_GATE_H__
#define __POLYTIMOS_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
void polytimos_hash( void *state, const void *input );
int scanhash_polytimos( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
void init_polytimos_context();
#endif

115
algo/polytimos/polytimos.c Normal file
View File

@@ -0,0 +1,115 @@
#include "polytimos-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/skein/sph_skein.h"
#include "algo/echo/sph_echo.h"
#include "algo/fugue//sph_fugue.h"
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/gost/sph_gost.h"
#ifndef NO_AES_NI
#include "algo/echo/aes_ni/hash_api.h"
#endif
typedef struct {
sph_skein512_context skein;
sph_shabal512_context shabal;
#ifdef NO_AES_NI
sph_echo512_context echo;
#else
hashState_echo echo;
#endif
hashState_luffa luffa;
sph_fugue512_context fugue;
sph_gost512_context gost;
} poly_ctx_holder;
poly_ctx_holder poly_ctx;
void init_polytimos_context()
{
sph_skein512_init(&poly_ctx.skein);
sph_shabal512_init(&poly_ctx.shabal);
#ifdef NO_AES_NI
sph_echo512_init(&poly_ctx.echo);
#else
init_echo( &poly_ctx.echo, 512 );
#endif
init_luffa( &poly_ctx.luffa, 512 );
sph_fugue512_init(&poly_ctx.fugue);
sph_gost512_init(&poly_ctx.gost);
}
void polytimos_hash(void *output, const void *input)
{
uint32_t hashA[16] __attribute__ ((aligned (64)));
poly_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &poly_ctx, sizeof(poly_ctx) );
sph_skein512(&ctx.skein, input, 80);
sph_skein512_close(&ctx.skein, hashA);
sph_shabal512(&ctx.shabal, hashA, 64);
sph_shabal512_close(&ctx.shabal, hashA);
#ifdef NO_AES_NI
sph_echo512(&ctx.echo, hashA, 64);
sph_echo512_close(&ctx.echo, hashA);
#else
update_final_echo ( &ctx.echo, (BitSequence *)hashA,
(const BitSequence *)hashA, 512 );
#endif
update_and_final_luffa( &ctx.luffa, (BitSequence*)hashA,
(const BitSequence*)hashA, 64 );
sph_fugue512(&ctx.fugue, hashA, 64);
sph_fugue512_close(&ctx.fugue, hashA);
sph_gost512(&ctx.gost, hashA, 64);
sph_gost512_close(&ctx.gost, hashA);
memcpy(output, hashA, 32);
}
int scanhash_polytimos(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
{
uint32_t _ALIGN(128) hash[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if (opt_benchmark)
ptarget[7] = 0x0cff;
// we need bigendian data...
for (int i=0; i < 19; i++) {
be32enc(&endiandata[i], pdata[i]);
}
do {
be32enc(&endiandata[19], nonce);
polytimos_hash(hash, endiandata);
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
work_set_target_ratio(work, hash);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;
} while (nonce < max_nonce && !(*restart));
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

View File

@@ -1,5 +1,4 @@
#include "cpuminer-config.h"
#include "miner.h"
#include "algo-gate-api.h"
#include <stdio.h>

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>

116
algo/s3.c
View File

@@ -1,116 +0,0 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
void s3hash(void *output, const void *input)
{
sph_shavite512_context ctx_shavite;
sph_simd512_context ctx_simd;
sph_skein512_context ctx_skein;
unsigned char _ALIGN(128) hash[64];
sph_shavite512_init(&ctx_shavite);
sph_shavite512(&ctx_shavite, input, 80);
sph_shavite512_close(&ctx_shavite, (void*)hash);
sph_simd512_init(&ctx_simd);
sph_simd512(&ctx_simd, (const void*)hash, 64);
sph_simd512_close(&ctx_simd, (void*)hash);
sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, (const void*)hash, 64);
sph_skein512_close(&ctx_skein, (void*)hash);
memcpy(output, hash, 32);
}
int scanhash_s3(int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(32) hash64[8];
uint32_t endiandata[32];
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
for (int kk=0; kk < 32; kk++) {
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
};
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < 6; m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
do {
pdata[19] = ++n;
be32enc(&endiandata[19], n);
s3hash(hash64, endiandata);
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool register_s3_algo( algo_gate_t* gate )
{
algo_not_tested();
gate->scanhash = (void*)&scanhash_s3;
gate->hash = (void*)&s3hash;
// gate->get_max64 = &s3_get_max64;
return true;
};

View File

@@ -27,7 +27,6 @@
* online backup system.
*/
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>
@@ -780,7 +779,7 @@ bool register_scrypt_algo( algo_gate_t* gate )
{
gate->miner_thread_init =(void*)&scrypt_miner_thread_init;
gate->scanhash = (void*)&scanhash_scrypt;
gate->hash = (void*)&scrypt_1024_1_1_256_24way;
// gate->hash = (void*)&scrypt_1024_1_1_256_24way;
gate->set_target = (void*)&scrypt_set_target;
gate->get_max64 = (void*)&scrypt_get_max64;

View File

@@ -1,5 +1,3 @@
#include "miner.h"
#include <stdlib.h>
#include <string.h>
#include "inttypes.h"

View File

@@ -8,7 +8,6 @@
* any later version. See COPYING for more details.
*/
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>

View File

@@ -1,16 +1,13 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "sph_sha2.h"
#include <openssl/sha.h>
#if defined __SHA__
#include <openssl/sha.h>
#ifndef USE_SPH_SHA
static SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
static __thread SHA256_CTX sha256t_mid __attribute__ ((aligned (64)));
#else
@@ -21,7 +18,7 @@
void sha256t_midstate( const void* input )
{
memcpy( &sha256t_mid, &sha256t_ctx, sizeof sha256t_mid );
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Update( &sha256t_mid, input, 64 );
#else
sph_sha256( &sha256t_mid, input, 64 );
@@ -34,7 +31,7 @@ void sha256t_hash(void* output, const void* input, uint32_t len)
const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_CTX ctx_sha256 __attribute__ ((aligned (64)));
memcpy( &ctx_sha256, &sha256t_mid, sizeof sha256t_mid );
@@ -150,12 +147,12 @@ void sha256t_set_target( struct work* work, double job_diff )
bool register_sha256t_algo( algo_gate_t* gate )
{
#if defined __SHA__
#ifndef USE_SPH_SHA
SHA256_Init( &sha256t_ctx );
#else
sph_sha256_init( &sha256t_ctx );
#endif
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash;
gate->set_target = (void*)&sha256t_set_target;

View File

@@ -8,28 +8,12 @@
#define DATA_ALIGN(x) __declspec(align(16)) x
#endif
#include "compat.h"
#include "simd-compat.h"
#include "algo/sha/sha3-defs.h"
/*
* NIST API Specific types.
*/
//typedef unsigned char BitSequence;
//#ifdef HAS_64
// typedef u64 DataLength;
//#else
// typedef unsigned long DataLength;
//#endif
// can't find u32 or fft-t
#include <stdint.h>
typedef uint32_t u32;
typedef int fft_t;
//typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2} HashReturn;
typedef struct {
unsigned int hashbitlen;
unsigned int blocksize;

View File

@@ -1,5 +1,5 @@
#ifndef __COMPAT_H__
#define __COMPAT_H__
#ifndef __SIMD_COMPAT_H__
#define __SIMD_COMPAT_H__
#include <limits.h>
@@ -24,14 +24,7 @@
*/
#include <stdint.h>
#ifdef UINT32_MAX
typedef uint32_t u32;
#else
typedef uint_fast32_t u32;
#endif
typedef unsigned long long u64;
#include "algo/sha/brg_types.h"
#define C32(x) ((u32)(x))

120
algo/skein/skein-4way.c Normal file
View File

@@ -0,0 +1,120 @@
#include "skein-gate.h"
#include <string.h>
#include <stdint.h>
#include <openssl/sha.h>
#include "skein-hash-4way.h"
#if defined (__AVX2__)
void skeinhash_4way( void *state, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
skein512_4way_context ctx_skein;
SHA256_CTX ctx_sha256;
skein512_4way_init( &ctx_skein );
skein512_4way( &ctx_skein, input, 80 );
skein512_4way_close( &ctx_skein, vhash );
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash0, 64 );
SHA256_Final( (unsigned char*)hash0, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash1, 64 );
SHA256_Final( (unsigned char*)hash1, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash2, 64 );
SHA256_Final( (unsigned char*)hash2, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash3, 64 );
SHA256_Final( (unsigned char*)hash3, &ctx_sha256 );
memcpy( (char*)state, (char*)hash0, 32 );
memcpy( ((char*)state) + 32, (char*)hash1, 32 );
memcpy( ((char*)state) + 64, (char*)hash2, 32 );
memcpy( ((char*)state) + 96, (char*)hash3, 32 );
}
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint64_t *edata = (uint64_t*)endiandata;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
// hash is returned deinterleaved
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
// data is 80 bytes, 20 u32 or 4 u64.
swab32_array( endiandata, pdata, 20 );
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
skeinhash_4way( hash, vdata );
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
// always put nonce0 in work data for compartibility with
// non vectored algos.
pdata[19] = n;
}
if ( (hash+8)[7] < Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
}
if ( (hash+16)[7] < Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
}
if ( (hash+24)[7] < Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

21
algo/skein/skein-gate.c Normal file
View File

@@ -0,0 +1,21 @@
#include "skein-gate.h"
#include "sph_skein.h"
#include "skein-hash-4way.h"
int64_t skein_get_max64() { return 0x7ffffLL; }
bool register_skein_algo( algo_gate_t* gate )
{
#if defined (SKEIN_4WAY)
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_skein_4way;
gate->hash = (void*)&skeinhash_4way;
#else
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_skein;
gate->hash = (void*)&skeinhash;
#endif
gate->get_max64 = (void*)&skein_get_max64;
return true;
};

23
algo/skein/skein-gate.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef __SKEIN_GATE_H__
#define __SKEIN_GATE_H__
#include <stdint.h>
#include "algo-gate-api.h"
#if defined(FOUR_WAY) && defined(__AVX2__)
#define SKEIN_4WAY
#endif
#if defined(SKEIN_4WAY)
void skeinhash_4way( void *output, const void *input );
int scanhash_skein_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void skeinhash( void *output, const void *input );
int scanhash_skein( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -0,0 +1,598 @@
/* $Id: skein.c 254 2011-06-07 19:38:58Z tp $ */
/*
* Skein implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#if defined (__AVX2__)
#include <stddef.h>
#include <string.h>
#include "skein-hash-4way.h"
#ifdef __cplusplus
extern "C"{
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
/*
* M9_ ## s ## _ ## i evaluates to s+i mod 9 (0 <= s <= 18, 0 <= i <= 7).
*/
#define M9_0_0 0
#define M9_0_1 1
#define M9_0_2 2
#define M9_0_3 3
#define M9_0_4 4
#define M9_0_5 5
#define M9_0_6 6
#define M9_0_7 7
#define M9_1_0 1
#define M9_1_1 2
#define M9_1_2 3
#define M9_1_3 4
#define M9_1_4 5
#define M9_1_5 6
#define M9_1_6 7
#define M9_1_7 8
#define M9_2_0 2
#define M9_2_1 3
#define M9_2_2 4
#define M9_2_3 5
#define M9_2_4 6
#define M9_2_5 7
#define M9_2_6 8
#define M9_2_7 0
#define M9_3_0 3
#define M9_3_1 4
#define M9_3_2 5
#define M9_3_3 6
#define M9_3_4 7
#define M9_3_5 8
#define M9_3_6 0
#define M9_3_7 1
#define M9_4_0 4
#define M9_4_1 5
#define M9_4_2 6
#define M9_4_3 7
#define M9_4_4 8
#define M9_4_5 0
#define M9_4_6 1
#define M9_4_7 2
#define M9_5_0 5
#define M9_5_1 6
#define M9_5_2 7
#define M9_5_3 8
#define M9_5_4 0
#define M9_5_5 1
#define M9_5_6 2
#define M9_5_7 3
#define M9_6_0 6
#define M9_6_1 7
#define M9_6_2 8
#define M9_6_3 0
#define M9_6_4 1
#define M9_6_5 2
#define M9_6_6 3
#define M9_6_7 4
#define M9_7_0 7
#define M9_7_1 8
#define M9_7_2 0
#define M9_7_3 1
#define M9_7_4 2
#define M9_7_5 3
#define M9_7_6 4
#define M9_7_7 5
#define M9_8_0 8
#define M9_8_1 0
#define M9_8_2 1
#define M9_8_3 2
#define M9_8_4 3
#define M9_8_5 4
#define M9_8_6 5
#define M9_8_7 6
#define M9_9_0 0
#define M9_9_1 1
#define M9_9_2 2
#define M9_9_3 3
#define M9_9_4 4
#define M9_9_5 5
#define M9_9_6 6
#define M9_9_7 7
#define M9_10_0 1
#define M9_10_1 2
#define M9_10_2 3
#define M9_10_3 4
#define M9_10_4 5
#define M9_10_5 6
#define M9_10_6 7
#define M9_10_7 8
#define M9_11_0 2
#define M9_11_1 3
#define M9_11_2 4
#define M9_11_3 5
#define M9_11_4 6
#define M9_11_5 7
#define M9_11_6 8
#define M9_11_7 0
#define M9_12_0 3
#define M9_12_1 4
#define M9_12_2 5
#define M9_12_3 6
#define M9_12_4 7
#define M9_12_5 8
#define M9_12_6 0
#define M9_12_7 1
#define M9_13_0 4
#define M9_13_1 5
#define M9_13_2 6
#define M9_13_3 7
#define M9_13_4 8
#define M9_13_5 0
#define M9_13_6 1
#define M9_13_7 2
#define M9_14_0 5
#define M9_14_1 6
#define M9_14_2 7
#define M9_14_3 8
#define M9_14_4 0
#define M9_14_5 1
#define M9_14_6 2
#define M9_14_7 3
#define M9_15_0 6
#define M9_15_1 7
#define M9_15_2 8
#define M9_15_3 0
#define M9_15_4 1
#define M9_15_5 2
#define M9_15_6 3
#define M9_15_7 4
#define M9_16_0 7
#define M9_16_1 8
#define M9_16_2 0
#define M9_16_3 1
#define M9_16_4 2
#define M9_16_5 3
#define M9_16_6 4
#define M9_16_7 5
#define M9_17_0 8
#define M9_17_1 0
#define M9_17_2 1
#define M9_17_3 2
#define M9_17_4 3
#define M9_17_5 4
#define M9_17_6 5
#define M9_17_7 6
#define M9_18_0 0
#define M9_18_1 1
#define M9_18_2 2
#define M9_18_3 3
#define M9_18_4 4
#define M9_18_5 5
#define M9_18_6 6
#define M9_18_7 7
/*
* M3_ ## s ## _ ## i evaluates to s+i mod 3 (0 <= s <= 18, 0 <= i <= 1).
*/
#define M3_0_0 0
#define M3_0_1 1
#define M3_1_0 1
#define M3_1_1 2
#define M3_2_0 2
#define M3_2_1 0
#define M3_3_0 0
#define M3_3_1 1
#define M3_4_0 1
#define M3_4_1 2
#define M3_5_0 2
#define M3_5_1 0
#define M3_6_0 0
#define M3_6_1 1
#define M3_7_0 1
#define M3_7_1 2
#define M3_8_0 2
#define M3_8_1 0
#define M3_9_0 0
#define M3_9_1 1
#define M3_10_0 1
#define M3_10_1 2
#define M3_11_0 2
#define M3_11_1 0
#define M3_12_0 0
#define M3_12_1 1
#define M3_13_0 1
#define M3_13_1 2
#define M3_14_0 2
#define M3_14_1 0
#define M3_15_0 0
#define M3_15_1 1
#define M3_16_0 1
#define M3_16_1 2
#define M3_17_0 2
#define M3_17_1 0
#define M3_18_0 0
#define M3_18_1 1
#define XCAT(x, y) XCAT_(x, y)
#define XCAT_(x, y) x ## y
#define SKBI(k, s, i) XCAT(k, XCAT(XCAT(XCAT(M9_, s), _), i))
#define SKBT(t, s, v) XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
// AVX2 all scalar vars are now vectors representing 4 nonces in parallel
#define TFBIG_KINIT_4WAY( k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2 ) \
do { \
k8 = _mm256_xor_si256( _mm256_xor_si256( \
_mm256_xor_si256( _mm256_xor_si256( k0, k1 ), \
_mm256_xor_si256( k2, k3 ) ), \
_mm256_xor_si256( _mm256_xor_si256( k4, k5 ), \
_mm256_xor_si256( k6, k7 ) ) ), \
_mm256_set_epi64x( SPH_C64(0x1BD11BDAA9FC1A22), \
SPH_C64(0x1BD11BDAA9FC1A22), \
SPH_C64(0x1BD11BDAA9FC1A22), \
SPH_C64(0x1BD11BDAA9FC1A22) ) ); \
t2 = t0 ^ t1; \
} while (0)
#define TFBIG_ADDKEY_4WAY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) \
do { \
w0 = _mm256_add_epi64( w0, SKBI(k,s,0) ); \
w1 = _mm256_add_epi64( w1, SKBI(k,s,1) ); \
w2 = _mm256_add_epi64( w2, SKBI(k,s,2) ); \
w3 = _mm256_add_epi64( w3, SKBI(k,s,3) ); \
w4 = _mm256_add_epi64( w4, SKBI(k,s,4) ); \
w5 = _mm256_add_epi64( w5, _mm256_add_epi64( SKBI(k,s,5), \
_mm256_set_epi64x( SKBT(t,s,0), SKBT(t,s,0), \
SKBT(t,s,0), SKBT(t,s,0) ) ) ); \
w6 = _mm256_add_epi64( w6, _mm256_add_epi64( SKBI(k,s,6), \
_mm256_set_epi64x( SKBT(t,s,1), SKBT(t,s,1), \
SKBT(t,s,1), SKBT(t,s,1) ) ) ); \
w7 = _mm256_add_epi64( w7, _mm256_add_epi64( SKBI(k,s,7), \
_mm256_set_epi64x( s, s, s, s ) ) ); \
} while (0)
#define TFBIG_MIX_4WAY(x0, x1, rc) \
do { \
x0 = _mm256_add_epi64( x0, x1 ); \
x1 = _mm256_xor_si256( mm256_rotl_64( x1, rc ), x0 ); \
} while (0)
// typeless
#define TFBIG_MIX8(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) do { \
TFBIG_MIX_4WAY(w0, w1, rc0); \
TFBIG_MIX_4WAY(w2, w3, rc1); \
TFBIG_MIX_4WAY(w4, w5, rc2); \
TFBIG_MIX_4WAY(w6, w7, rc3); \
} while (0)
#define TFBIG_4e(s) do { \
TFBIG_ADDKEY_4WAY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \
TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \
TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \
TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 44, 9, 54, 56); \
} while (0)
#define TFBIG_4o(s) do { \
TFBIG_ADDKEY_4WAY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \
TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \
TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \
TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 8, 35, 56, 22); \
} while (0)
// scale buf offset by 4
#define UBI_BIG_4WAY(etype, extra) \
do { \
sph_u64 t0, t1, t2; \
__m256i h8; \
/* can LE be assumed? \
dec64le does nothing when SPH_LITTLE endian is set, as it is. \
__m256i m0 = _mm256_dec64le( buf ); \
__m256i m1 = _mm256_dec64le( buf + 8*4 ); \
__m256i m2 = _mm256_dec64le( buf + 16*4 ); \
__m256i m3 = _mm256_dec64le( buf + 24*4 ); \
__m256i m4 = _mm256_dec64le( buf + 32*4 ); \
__m256i m5 = _mm256_dec64le( buf + 40*4 ); \
__m256i m6 = _mm256_dec64le( buf + 48*4 ); \
__m256i m7 = _mm256_dec64le( buf + 56*4 ); \
*/ \
__m256i m0 = buf[0]; \
__m256i m1 = buf[1]; \
__m256i m2 = buf[2]; \
__m256i m3 = buf[3]; \
__m256i m4 = buf[4]; \
__m256i m5 = buf[5]; \
__m256i m6 = buf[6]; \
__m256i m7 = buf[7]; \
\
__m256i p0 = m0; \
__m256i p1 = m1; \
__m256i p2 = m2; \
__m256i p3 = m3; \
__m256i p4 = m4; \
__m256i p5 = m5; \
__m256i p6 = m6; \
__m256i p7 = m7; \
t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
TFBIG_KINIT_4WAY(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
TFBIG_4e(0); \
TFBIG_4o(1); \
TFBIG_4e(2); \
TFBIG_4o(3); \
TFBIG_4e(4); \
TFBIG_4o(5); \
TFBIG_4e(6); \
TFBIG_4o(7); \
TFBIG_4e(8); \
TFBIG_4o(9); \
TFBIG_4e(10); \
TFBIG_4o(11); \
TFBIG_4e(12); \
TFBIG_4o(13); \
TFBIG_4e(14); \
TFBIG_4o(15); \
TFBIG_4e(16); \
TFBIG_4o(17); \
TFBIG_ADDKEY_4WAY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, 18); \
h0 = _mm256_xor_si256( m0, p0 );\
h1 = _mm256_xor_si256( m1, p1 );\
h2 = _mm256_xor_si256( m2, p2 );\
h3 = _mm256_xor_si256( m3, p3 );\
h4 = _mm256_xor_si256( m4, p4 );\
h5 = _mm256_xor_si256( m5, p5 );\
h6 = _mm256_xor_si256( m6, p6 );\
h7 = _mm256_xor_si256( m7, p7 );\
} while (0)
#define DECL_STATE_BIG_4WAY \
__m256i h0, h1, h2, h3, h4, h5, h6, h7; \
sph_u64 bcount;
#define READ_STATE_BIG(sc) do { \
h0 = (sc)->h0; \
h1 = (sc)->h1; \
h2 = (sc)->h2; \
h3 = (sc)->h3; \
h4 = (sc)->h4; \
h5 = (sc)->h5; \
h6 = (sc)->h6; \
h7 = (sc)->h7; \
bcount = sc->bcount; \
} while (0)
#define WRITE_STATE_BIG(sc) do { \
(sc)->h0 = h0; \
(sc)->h1 = h1; \
(sc)->h2 = h2; \
(sc)->h3 = h3; \
(sc)->h4 = h4; \
(sc)->h5 = h5; \
(sc)->h6 = h6; \
(sc)->h7 = h7; \
sc->bcount = bcount; \
} while (0)
static void
skein_big_init_4way( skein512_4way_context *sc, const sph_u64 *iv )
{
sc->h0 = _mm256_set_epi64x( iv[0], iv[0],iv[0],iv[0] );
sc->h1 = _mm256_set_epi64x( iv[1], iv[1],iv[1],iv[1] );
sc->h2 = _mm256_set_epi64x( iv[2], iv[2],iv[2],iv[2] );
sc->h3 = _mm256_set_epi64x( iv[3], iv[3],iv[3],iv[3] );
sc->h4 = _mm256_set_epi64x( iv[4], iv[4],iv[4],iv[4] );
sc->h5 = _mm256_set_epi64x( iv[5], iv[5],iv[5],iv[5] );
sc->h6 = _mm256_set_epi64x( iv[6], iv[6],iv[6],iv[6] );
sc->h7 = _mm256_set_epi64x( iv[7], iv[7],iv[7],iv[7] );
sc->bcount = 0;
sc->ptr = 0;
}
static void
skein_big_core_4way( skein512_4way_context *sc, const void *data,
size_t len )
{
__m256i *vdata = (__m256i*)data;
__m256i *buf;
size_t ptr;
unsigned first;
DECL_STATE_BIG_4WAY
buf = sc->buf;
ptr = sc->ptr;
const int buf_size = 64; // 64 * _m256i
if ( len <= buf_size - ptr )
{
memcpy_m256i( buf + (ptr>>3), vdata, len>>3 );
sc->ptr = ptr + len;
return;
}
READ_STATE_BIG( sc );
first = ( bcount == 0 ) << 7;
do {
size_t clen;
if ( ptr == buf_size )
{
bcount ++;
UBI_BIG_4WAY( 96 + first, 0 );
first = 0;
ptr = 0;
}
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_m256i( buf + (ptr>>3), vdata, clen>>3 );
ptr += clen;
vdata += (clen>>3);
len -= clen;
} while ( len > 0 );
WRITE_STATE_BIG( sc );
sc->ptr = ptr;
}
static void
skein_big_close_4way( skein512_4way_context *sc, unsigned ub, unsigned n,
void *dst, size_t out_len )
{
__m256i *buf;
size_t ptr;
unsigned et;
DECL_STATE_BIG_4WAY
buf = sc->buf;
ptr = sc->ptr;
const int buf_size = 64;
/*
* At that point, if ptr == 0, then the message was empty;
* otherwise, there is between 1 and 64 bytes (inclusive) which
* are yet to be processed. Either way, we complete the buffer
* to a full block with zeros (the Skein specification mandates
* that an empty message is padded so that there is at least
* one block to process).
*
* Once this block has been processed, we do it again, with
* a block full of zeros, for the output (that block contains
* the encoding of "0", over 8 bytes, then padded with zeros).
*/
READ_STATE_BIG(sc);
memset_zero_m256i( buf + (ptr>>3), (buf_size - ptr) >> 3 );
et = 352 + ((bcount == 0) << 7);
UBI_BIG_4WAY( et, ptr );
memset_zero_m256i( buf, buf_size >> 3 );
bcount = 0;
UBI_BIG_4WAY( 510, 8 );
buf[0] = h0;
buf[1] = h1;
buf[2] = h2;
buf[3] = h3;
buf[4] = h4;
buf[5] = h5;
buf[6] = h6;
buf[7] = h7;
memcpy_m256i( dst, buf, out_len >> 3 );
}
static const sph_u64 IV256[] = {
SPH_C64(0xCCD044A12FDB3E13), SPH_C64(0xE83590301A79A9EB),
SPH_C64(0x55AEA0614F816E6F), SPH_C64(0x2A2767A4AE9B94DB),
SPH_C64(0xEC06025E74DD7683), SPH_C64(0xE7A436CDC4746251),
SPH_C64(0xC36FBAF9393AD185), SPH_C64(0x3EEDBA1833EDFC13)
};
static const sph_u64 IV512[] = {
SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
};
void
skein256_4way_init(void *cc)
{
skein_big_init_4way(cc, IV256);
}
void
skein256_4way(void *cc, const void *data, size_t len)
{
skein_big_core_4way(cc, data, len);
}
void
skein256_4way_close(void *cc, void *dst)
{
skein_big_close_4way(cc, 0, 0, dst, 32);
}
void
skein512_4way_init(void *cc)
{
skein_big_init_4way(cc, IV512);
}
void
skein512_4way(void *cc, const void *data, size_t len)
{
skein_big_core_4way(cc, data, len);
}
void
skein512_4way_close(void *cc, void *dst)
{
skein_big_close_4way(cc, 0, 0, dst, 64);
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,93 @@
/* $Id: sph_skein.h 253 2011-06-07 18:33:10Z tp $ */
/**
* Skein interface. The Skein specification defines three main
* functions, called Skein-256, Skein-512 and Skein-1024, which can be
* further parameterized with an output length. For the SHA-3
* competition, Skein-512 is used for output sizes of 224, 256, 384 and
* 512 bits; this is what this code implements. Thus, we hereafter call
* Skein-224, Skein-256, Skein-384 and Skein-512 what the Skein
* specification defines as Skein-512-224, Skein-512-256, Skein-512-384
* and Skein-512-512, respectively.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_skein.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef __SKEIN_HASH_4WAY_H__
#define __SKEIN_HASH_4WAY_H__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
// Output size in bits
#define SPH_SIZE_skein256 256
#define SPH_SIZE_skein512 512
#ifdef __AVX2__
typedef struct {
__m256i buf[8] __attribute__ ((aligned (32)));
__m256i h0, h1, h2, h3, h4, h5, h6, h7;
size_t ptr;
sph_u64 bcount;
} skein512_4way_context;
void skein512_4way_init(void *cc);
void skein512_4way(void *cc, const void *data, size_t len);
void skein512_4way_close(void *cc, void *dst);
//void sph_skein512_addbits_and_close(
// void *cc, unsigned ub, unsigned n, void *dst);
#endif
#ifdef __AVX__
typedef struct {
__m128i buf[8] __attribute__ ((aligned (32)));
__m128i h0, h1, h2, h3, h4, h5, h6, h7;
size_t ptr;
sph_u64 bcount;
} skein256_4way_context;
void skein256_4way_init(void *cc);
void skein256_4way(void *cc, const void *data, size_t len);
void skein256_4way_close(void *cc, void *dst);
//void sph_skein256_addbits_and_close(
// void *cc, unsigned ub, unsigned n, void *dst);
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1,53 +1,32 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
#include <stdint.h>
#include "sph_skein.h"
#if defined __SHA__
#include <openssl/sha.h>
#else
#include "algo/sha/sph_sha2.h"
#endif
typedef struct {
sph_skein512_context skein;
#if defined __SHA__
SHA256_CTX sha256;
#else
sph_sha256_context sha256;
#endif
} skein_ctx_holder;
skein_ctx_holder skein_ctx;
void init_skein_ctx()
{
sph_skein512_init( &skein_ctx.skein );
#if defined __SHA__
SHA256_Init( &skein_ctx.sha256 );
#else
sph_sha256_init( &skein_ctx.sha256 );
#endif
}
void skeinhash(void *state, const void *input)
{
skein_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &skein_ctx, sizeof(skein_ctx) );
uint32_t hash[16] __attribute__ ((aligned (64)));
sph_skein512( &ctx.skein, input, 80 );
sph_skein512_close( &ctx.skein, hash );
#if defined __SHA__
SHA256_Update( &ctx.sha256, hash, 64 );
SHA256_Final( (unsigned char*) hash, &ctx.sha256 );
sph_skein512_context ctx_skein;
#ifndef USE_SPH_SHA
SHA256_CTX ctx_sha256;
#else
sph_sha256( &ctx.sha256, hash, 64 );
sph_sha256_close( &ctx.sha256, hash );
sph_sha256_context ctx_sha256;
#endif
sph_skein512_init( &ctx_skein );
sph_skein512( &ctx_skein, input, 80 );
sph_skein512_close( &ctx_skein, hash );
#ifndef USE_SPH_SHA
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, (unsigned char*)hash, 64 );
SHA256_Final( (unsigned char*) hash, &ctx_sha256 );
#else
sph_sha256_init( &ctx_sha256 );
sph_sha256( &ctx_sha256, hash, 64 );
sph_sha256_close( &ctx_sha256, hash );
#endif
memcpy(state, hash, 32);
@@ -84,15 +63,3 @@ int scanhash_skein(int thr_id, struct work *work,
return 0;
}
int64_t skein_get_max64() { return 0x7ffffLL; }
bool register_skein_algo( algo_gate_t* gate )
{
init_skein_ctx();
gate->optimizations = SSE2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_skein;
gate->hash = (void*)&skeinhash;
gate->get_max64 = (void*)&skein_get_max64;
return true;
};

93
algo/skein/skein2-4way.c Normal file
View File

@@ -0,0 +1,93 @@
#include "skein2-gate.h"
#include <string.h>
#include <stdint.h>
#include "skein-hash-4way.h"
#if defined(SKEIN2_4WAY)
void skein2hash_4way( void *output, const void *input )
{
skein512_4way_context ctx;
uint64_t hash[8*4] __attribute__ ((aligned (64)));
uint64_t *out64 = (uint64_t*)output;
skein512_4way_init( &ctx );
skein512_4way( &ctx, input, 80 );
skein512_4way_close( &ctx, hash );
skein512_4way_init( &ctx );
skein512_4way( &ctx, hash, 64 );
skein512_4way_close( &ctx, hash );
m256_deinterleave_4x64( out64, out64+4, out64+8, out64+12, hash, 256 );
}
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint64_t *edata = (uint64_t*)endiandata;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
// hash is returned deinterleaved
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
swab32_array( endiandata, pdata, 20 );
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
skein2hash( hash, vdata );
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
}
if ( (hash+8)[7] < Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
}
if ( (hash+16)[7] < Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
}
if ( (hash+24)[7] < Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );
*hashes_done = n - first_nonce + 1;
return num_found;
}
#endif

24
algo/skein/skein2-gate.c Normal file
View File

@@ -0,0 +1,24 @@
#include "skein2-gate.h"
#include <stdint.h>
#include "sph_skein.h"
int64_t skein2_get_max64 ()
{
return 0x7ffffLL;
}
bool register_skein2_algo( algo_gate_t* gate )
{
#if defined (FOUR_WAY) && defined (__AVX2__)
gate->optimizations = SSE2_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_skein2_4way;
gate->hash = (void*)&skein2hash_4way;
#else
gate->optimizations = SSE2_OPT;
gate->scanhash = (void*)&scanhash_skein2;
gate->hash = (void*)&skein2hash;
#endif
gate->get_max64 = (void*)&skein2_get_max64;
return true;
};

20
algo/skein/skein2-gate.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef __SKEIN2GATE_H__
#define __SKEIN2_GATE_H__
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(FOUR_WAY) && defined(__AVX2__)
#define SKEIN2_4WAY
#endif
#if defined(SKEIN2_4WAY)
void skein2hash_4way( void *output, const void *input );
int scanhash_skein2_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t* hashes_done );
#endif
void skein2hash( void *output, const void *input );
int scanhash_skein2( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

View File

@@ -1,4 +1,3 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
#include <stdint.h>
@@ -66,16 +65,4 @@ int scanhash_skein2(int thr_id, struct work *work,
return 0;
}
int64_t skein2_get_max64 ()
{
return 0x7ffffLL;
}
bool register_skein2_algo( algo_gate_t* gate )
{
gate->scanhash = (void*)&scanhash_skein2;
gate->hash = (void*)&skein2hash;
gate->get_max64 = (void*)&skein2_get_max64;
return true;
};

View File

@@ -39,7 +39,6 @@
extern "C"{
#endif
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SKEIN
#define SPH_SMALL_FOOTPRINT_SKEIN 1
#endif
@@ -883,6 +882,7 @@ skein_big_core(sph_skein_big_context *sc, const void *data, size_t len)
}
READ_STATE_BIG(sc);
first = (bcount == 0) << 7;
do {
size_t clen;

101
algo/skunk.c Normal file
View File

@@ -0,0 +1,101 @@
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/gost/sph_gost.h"
#include "algo/skein/sph_skein.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
typedef struct {
sph_skein512_context skein;
cubehashParam cube;
sph_fugue512_context fugue;
sph_gost512_context gost;
} skunk_ctx_holder;
static __thread skunk_ctx_holder skunk_ctx;
void skunkhash( void *output, const void *input )
{
unsigned char hash[128] __attribute__ ((aligned (64)));
skunk_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &skunk_ctx, sizeof(skunk_ctx) );
sph_skein512( &ctx.skein, input+64, 16 );
sph_skein512_close( &ctx.skein, (void*) hash );
cubehashUpdateDigest( &ctx.cube, (byte*) hash, (const byte*)hash, 64 );
sph_fugue512( &ctx.fugue, hash, 64 );
sph_fugue512_close( &ctx.fugue, hash );
sph_gost512( &ctx.gost, hash, 64 );
sph_gost512_close( &ctx.gost, hash );
memcpy(output, hash, 32);
}
int scanhash_skunk( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t _ALIGN(64) endiandata[20];
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
((uint32_t*)ptarget)[7] = 0x0cff;
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
// precalc midstate
sph_skein512_init( &skunk_ctx.skein );
sph_skein512( &skunk_ctx.skein, endiandata, 64 );
const uint32_t Htarg = ptarget[7];
do
{
uint32_t hash[8];
be32enc( &endiandata[19], nonce );
skunkhash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
bool skunk_thread_init()
{
sph_skein512_init( &skunk_ctx.skein );
cubehashInit( &skunk_ctx.cube, 512, 16, 32 );
sph_fugue512_init( &skunk_ctx.fugue );
sph_gost512_init( &skunk_ctx.gost );
return true;
}
bool register_skunk_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->miner_thread_init = (void*)&skunk_thread_init;
gate->scanhash = (void*)&scanhash_skunk;
gate->hash = (void*)&skunkhash;
return true;
}

Some files were not shown because too many files have changed in this diff Show More