mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
de564ccbde | ||
![]() |
fcd7727b0d | ||
![]() |
3dd6787531 | ||
![]() |
cae1ce2ab7 | ||
![]() |
7a91c41d74 | ||
![]() |
c6bc9d67fb |
@@ -55,9 +55,6 @@ cpuminer_SOURCES = \
|
||||
algo/blake/mod_blakecoin.c \
|
||||
algo/blake/blakecoin.c \
|
||||
algo/blake/blakecoin-4way.c \
|
||||
algo/blake/decred-gate.c \
|
||||
algo/blake/decred.c \
|
||||
algo/blake/decred-4way.c \
|
||||
algo/blake/pentablake-gate.c \
|
||||
algo/blake/pentablake-4way.c \
|
||||
algo/blake/pentablake.c \
|
||||
@@ -178,6 +175,8 @@ cpuminer_SOURCES = \
|
||||
algo/sha/sha256t.c \
|
||||
algo/sha/sha256q-4way.c \
|
||||
algo/sha/sha256q.c \
|
||||
algo/sha/sha512256d-4way.c \
|
||||
algo/sha/sha256dt.c \
|
||||
algo/shabal/sph_shabal.c \
|
||||
algo/shabal/shabal-hash-4way.c \
|
||||
algo/shavite/sph_shavite.c \
|
||||
@@ -264,6 +263,8 @@ cpuminer_SOURCES = \
|
||||
algo/x16/x16r-4way.c \
|
||||
algo/x16/x16rv2.c \
|
||||
algo/x16/x16rv2-4way.c \
|
||||
algo/x16/x16rt.c \
|
||||
algo/x16/x16rt-4way.c \
|
||||
algo/x16/hex.c \
|
||||
algo/x16/x21s-4way.c \
|
||||
algo/x16/x21s.c \
|
||||
|
@@ -65,7 +65,58 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.22.3
|
||||
v3.22.2
|
||||
|
||||
Added sha512256d & sha256dt algos.
|
||||
Fixed intermittant invalid shares lyra2v2 AVX512.
|
||||
Removed application limits on the number of CPUs and threads, HW and OS limits still apply.
|
||||
Added a log warning if more threads are defined than active CPUs in affinity mask.
|
||||
Improved merkle tree memory management for stratum.
|
||||
Added transaction count to New Work log.
|
||||
Other small improvements.
|
||||
|
||||
v3.22.1
|
||||
|
||||
#393 fixed segfault in GBT, regression from v3.22.0.
|
||||
More efficient 32 bit data interleaving.
|
||||
|
||||
v3.22.0
|
||||
|
||||
Stratum: faster netdiff calculation.
|
||||
Merged a few updates from Pooler/cpuminer:
|
||||
Use CURLOPT_POSTFIELDS in json_rpc_call,
|
||||
Use CURLINFO_ACTIVESOCKET when supported,
|
||||
JSONRPC speedup,
|
||||
Speed up hex2bin function.
|
||||
Small log improvements, notably more frequent hash rate reports.
|
||||
Removed decred algo.
|
||||
|
||||
v3.21.5
|
||||
|
||||
All issues with v3.21.3 & v3.21.4 should be resolved.
|
||||
Changes since v3.21.2:
|
||||
#392 #379 #389 Fixed misaligned address segfault solo mining.
|
||||
#392 Fixed stats for myr-gr algo, and a few others, for CPUs without AVX2.
|
||||
#392 Fixed conditional mining.
|
||||
#392 Fixed cpu affinity on Ryzen CPUs using Windows binaries,
|
||||
Windows binaries no longer support CPU groups,
|
||||
Windows binaries support CPUs with up to 64 threads.
|
||||
Small optimizations to serialized vectoring.
|
||||
|
||||
v3.21.4 CANCELLED
|
||||
|
||||
Reapply selected changes from v3.21.3.
|
||||
#392 #379 #389 Fixed misaligned address segfault solo mining.
|
||||
#392 Fixed conditional mining.
|
||||
#392 Fixed cpu affinity on Ryzen CPUs using Windows binaries,
|
||||
Windows binaries no longer support CPU groups,
|
||||
Windows binaries support CPUs with up to 64 threads.
|
||||
|
||||
v3.21.3.1 UNRELEASED
|
||||
|
||||
Revert to 3.21.2
|
||||
|
||||
v3.21.3 CANCELLED
|
||||
|
||||
#392 #379 #389 Fixed misaligned address segfault solo mining.
|
||||
#392 Fixed stats for myr-gr algo, and a few others, for CPUs without AVX2.
|
||||
@@ -74,10 +125,10 @@ v3.22.3
|
||||
Windows binaries no longer support CPU groups,
|
||||
Windows binaries support CPUs with up to 64 threads.
|
||||
Midstate prehash is now centralized, done only once instead of by every thread
|
||||
for selected algos.
|
||||
for selected algos.
|
||||
Small optimizations to serialized vectoring.
|
||||
|
||||
v3.22.2
|
||||
v3.21.2
|
||||
|
||||
Faster SALSA SIMD shuffle for yespower, yescrypt & scryptn2.
|
||||
Fixed a couple of compiler warnings with gcc-12.
|
||||
|
83
aclocal.m4
vendored
83
aclocal.m4
vendored
@@ -1,6 +1,6 @@
|
||||
# generated automatically by aclocal 1.16.5 -*- Autoconf -*-
|
||||
# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -14,13 +14,13 @@
|
||||
m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.71],,
|
||||
[m4_warning([this file was generated for autoconf 2.71.
|
||||
m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
|
||||
[m4_warning([this file was generated for autoconf 2.69.
|
||||
You have another version of autoconf. It may work, but is not guaranteed to.
|
||||
If you have problems, you may need to regenerate the build system entirely.
|
||||
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
|
||||
|
||||
# Copyright (C) 2002-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2002-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -35,7 +35,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||
[am__api_version='1.16'
|
||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||
dnl require some minimum version. Point them to the right macro.
|
||||
m4_if([$1], [1.16.5], [],
|
||||
m4_if([$1], [1.16.1], [],
|
||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||
])
|
||||
|
||||
@@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
|
||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||
[AM_AUTOMAKE_VERSION([1.16.5])dnl
|
||||
[AM_AUTOMAKE_VERSION([1.16.1])dnl
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
||||
|
||||
# Figure out how to run the assembler. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -78,7 +78,7 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
|
||||
|
||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -130,7 +130,7 @@ am_aux_dir=`cd "$ac_aux_dir" && pwd`
|
||||
|
||||
# AM_CONDITIONAL -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1997-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -161,7 +161,7 @@ AC_CONFIG_COMMANDS_PRE(
|
||||
Usually this means the macro was only invoked conditionally.]])
|
||||
fi])])
|
||||
|
||||
# Copyright (C) 1999-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -352,7 +352,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
|
||||
|
||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1999-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -391,9 +391,7 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
done
|
||||
if test $am_rc -ne 0; then
|
||||
AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
|
||||
for automatic dependency tracking. If GNU make was not used, consider
|
||||
re-running the configure script with MAKE="gmake" (or whatever is
|
||||
necessary). You can also try re-running configure with the
|
||||
for automatic dependency tracking. Try re-running configure with the
|
||||
'--disable-dependency-tracking' option to at least be able to build
|
||||
the package (albeit without support for automatic dependency tracking).])
|
||||
fi
|
||||
@@ -420,7 +418,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
|
||||
# Do all the work for Automake. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -448,10 +446,6 @@ m4_defn([AC_PROG_CC])
|
||||
# release and drop the old call support.
|
||||
AC_DEFUN([AM_INIT_AUTOMAKE],
|
||||
[AC_PREREQ([2.65])dnl
|
||||
m4_ifdef([_$0_ALREADY_INIT],
|
||||
[m4_fatal([$0 expanded multiple times
|
||||
]m4_defn([_$0_ALREADY_INIT]))],
|
||||
[m4_define([_$0_ALREADY_INIT], m4_expansion_stack)])dnl
|
||||
dnl Autoconf wants to disallow AM_ names. We explicitly allow
|
||||
dnl the ones we care about.
|
||||
m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
|
||||
@@ -488,7 +482,7 @@ m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
|
||||
[_AM_SET_OPTIONS([$1])dnl
|
||||
dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
|
||||
m4_if(
|
||||
m4_ifset([AC_PACKAGE_NAME], [ok]):m4_ifset([AC_PACKAGE_VERSION], [ok]),
|
||||
m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]),
|
||||
[ok:ok],,
|
||||
[m4_fatal([AC_INIT should be called with package and version arguments])])dnl
|
||||
AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
|
||||
@@ -540,20 +534,6 @@ AC_PROVIDE_IFELSE([AC_PROG_OBJCXX],
|
||||
[m4_define([AC_PROG_OBJCXX],
|
||||
m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl
|
||||
])
|
||||
# Variables for tags utilities; see am/tags.am
|
||||
if test -z "$CTAGS"; then
|
||||
CTAGS=ctags
|
||||
fi
|
||||
AC_SUBST([CTAGS])
|
||||
if test -z "$ETAGS"; then
|
||||
ETAGS=etags
|
||||
fi
|
||||
AC_SUBST([ETAGS])
|
||||
if test -z "$CSCOPE"; then
|
||||
CSCOPE=cscope
|
||||
fi
|
||||
AC_SUBST([CSCOPE])
|
||||
|
||||
AC_REQUIRE([AM_SILENT_RULES])dnl
|
||||
dnl The testsuite driver may need to know about EXEEXT, so add the
|
||||
dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This
|
||||
@@ -635,7 +615,7 @@ for _am_header in $config_headers :; do
|
||||
done
|
||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||
|
||||
# Copyright (C) 2001-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -656,7 +636,7 @@ if test x"${install_sh+set}" != xset; then
|
||||
fi
|
||||
AC_SUBST([install_sh])])
|
||||
|
||||
# Copyright (C) 2003-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2003-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -678,7 +658,7 @@ AC_SUBST([am__leading_dot])])
|
||||
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
|
||||
# From Jim Meyering
|
||||
|
||||
# Copyright (C) 1996-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -713,7 +693,7 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
||||
|
||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -756,7 +736,7 @@ AC_SUBST([am__quote])])
|
||||
|
||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1997-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -777,7 +757,12 @@ AC_DEFUN([AM_MISSING_HAS_RUN],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
AC_REQUIRE_AUX_FILE([missing])dnl
|
||||
if test x"${MISSING+set}" != xset; then
|
||||
MISSING="\${SHELL} '$am_aux_dir/missing'"
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
|
||||
*)
|
||||
MISSING="\${SHELL} $am_aux_dir/missing" ;;
|
||||
esac
|
||||
fi
|
||||
# Use eval to expand $SHELL
|
||||
if eval "$MISSING --is-lightweight"; then
|
||||
@@ -790,7 +775,7 @@ fi
|
||||
|
||||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -819,7 +804,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
|
||||
AC_DEFUN([_AM_IF_OPTION],
|
||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||
|
||||
# Copyright (C) 1999-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -866,7 +851,7 @@ AC_LANG_POP([C])])
|
||||
# For backward compatibility.
|
||||
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
|
||||
|
||||
# Copyright (C) 2001-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -885,7 +870,7 @@ AC_DEFUN([AM_RUN_LOG],
|
||||
|
||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -966,7 +951,7 @@ AC_CONFIG_COMMANDS_PRE(
|
||||
rm -f conftest.file
|
||||
])
|
||||
|
||||
# Copyright (C) 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2009-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1026,7 +1011,7 @@ AC_SUBST([AM_BACKSLASH])dnl
|
||||
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
|
||||
])
|
||||
|
||||
# Copyright (C) 2001-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1054,7 +1039,7 @@ fi
|
||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||
|
||||
# Copyright (C) 2006-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2006-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
@@ -1073,7 +1058,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
|
||||
|
||||
# Check how to create a tarball. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2004-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2004-2018 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
|
@@ -253,7 +253,6 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->miner_thread_init = (void*)&return_true;
|
||||
gate->scanhash = (void*)&scanhash_generic;
|
||||
gate->hash = (void*)&null_hash;
|
||||
gate->prehash = (void*)&return_true;
|
||||
gate->get_new_work = (void*)&std_get_new_work;
|
||||
gate->work_decode = (void*)&std_le_work_decode;
|
||||
gate->decode_extra_data = (void*)&do_nothing;
|
||||
@@ -264,8 +263,6 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
gate->build_block_header = (void*)&std_build_block_header;
|
||||
gate->build_extraheader = (void*)&std_build_extraheader;
|
||||
gate->set_work_data_endian = (void*)&do_nothing;
|
||||
gate->calc_network_diff = (void*)&std_calc_network_diff;
|
||||
gate->ready_to_mine = (void*)&std_ready_to_mine;
|
||||
gate->resync_threads = (void*)&do_nothing;
|
||||
gate->do_this_thread = (void*)&return_true;
|
||||
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
|
||||
@@ -309,7 +306,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_BLAKECOIN: rc = register_blakecoin_algo ( gate ); break;
|
||||
case ALGO_BMW512: rc = register_bmw512_algo ( gate ); break;
|
||||
case ALGO_C11: rc = register_c11_algo ( gate ); break;
|
||||
case ALGO_DECRED: rc = register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: rc = register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: rc = register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_GROESTL: rc = register_groestl_algo ( gate ); break;
|
||||
@@ -341,9 +337,11 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
case ALGO_QUBIT: rc = register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: rc = register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SHA256D: rc = register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256DT: rc = register_sha256dt_algo ( gate ); break;
|
||||
case ALGO_SHA256Q: rc = register_sha256q_algo ( gate ); break;
|
||||
case ALGO_SHA256T: rc = register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHA3D: rc = register_sha3d_algo ( gate ); break;
|
||||
case ALGO_SHA512256D: rc = register_sha512256d_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: rc = register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: rc = register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: rc = register_skein2_algo ( gate ); break;
|
||||
@@ -428,7 +426,6 @@ const char* const algo_alias_map[][2] =
|
||||
{ "blake256r8", "blakecoin" },
|
||||
{ "blake256r8vnl", "vanilla" },
|
||||
{ "blake256r14", "blake" },
|
||||
{ "blake256r14dcr", "decred" },
|
||||
{ "diamond", "dmd-gr" },
|
||||
{ "espers", "hmq1725" },
|
||||
{ "flax", "c11" },
|
||||
|
@@ -119,7 +119,7 @@ typedef struct
|
||||
// to be registered with the gate.
|
||||
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
||||
|
||||
int ( *hash ) ( void*, const void*, const int );
|
||||
int ( *hash ) ( void*, const void*, int );
|
||||
|
||||
//optional, safe to use default in most cases
|
||||
|
||||
@@ -127,9 +127,6 @@ int ( *hash ) ( void*, const void*, const int );
|
||||
// other initialization specific to miner threads.
|
||||
bool ( *miner_thread_init ) ( int );
|
||||
|
||||
// Perform prehash after receiving new work
|
||||
int ( *prehash ) ( struct work* );
|
||||
|
||||
// Get thread local copy of blockheader with unique nonce.
|
||||
void ( *get_new_work ) ( struct work*, struct work*, int, uint32_t* );
|
||||
|
||||
@@ -147,7 +144,7 @@ void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
|
||||
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
|
||||
|
||||
void ( *build_block_header ) ( struct work*, uint32_t, uint32_t*,
|
||||
uint32_t*, uint32_t, uint32_t,
|
||||
uint32_t*, uint32_t, uint32_t,
|
||||
unsigned char* );
|
||||
|
||||
// Build mining.submit message
|
||||
@@ -158,19 +155,13 @@ char* ( *malloc_txs_request ) ( struct work* );
|
||||
// Big endian or little endian
|
||||
void ( *set_work_data_endian ) ( struct work* );
|
||||
|
||||
double ( *calc_network_diff ) ( struct work* );
|
||||
|
||||
// Wait for first work
|
||||
bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
|
||||
|
||||
// Diverge mining threads
|
||||
bool ( *do_this_thread ) ( int );
|
||||
|
||||
// After do_this_thread
|
||||
void ( *resync_threads ) ( int, struct work* );
|
||||
|
||||
// No longer needed
|
||||
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
|
||||
json_t* ( *longpoll_rpc_call ) ( CURL*, int*, char* );
|
||||
|
||||
set_t optimizations;
|
||||
int ( *get_work_data_size ) ();
|
||||
@@ -289,8 +280,6 @@ char* std_malloc_txs_request( struct work *work );
|
||||
// Default is do_nothing, little endian is assumed
|
||||
void set_work_data_big_endian( struct work *work );
|
||||
|
||||
double std_calc_network_diff( struct work *work );
|
||||
|
||||
void std_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits,
|
||||
@@ -300,9 +289,6 @@ void std_build_extraheader( struct work *work, struct stratum_ctx *sctx );
|
||||
|
||||
json_t* std_longpoll_rpc_call( CURL *curl, int *err, char *lp_url );
|
||||
|
||||
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id );
|
||||
|
||||
int std_get_work_data_size();
|
||||
|
||||
// Gate admin functions
|
||||
|
@@ -1,6 +1,5 @@
|
||||
#include "blake2s-gate.h"
|
||||
#include "blake2s-hash-4way.h"
|
||||
//#include "sph-blake2s.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -8,43 +7,6 @@
|
||||
|
||||
static __thread blake2s_16way_state blake2s_16w_ctx;
|
||||
|
||||
/*
|
||||
static blake2s_16way_state blake2s_16w_ctx;
|
||||
static uint32_t blake2s_16way_vdata[20*16] __attribute__ ((aligned (64)));
|
||||
*/
|
||||
/*
|
||||
int blake2s_16way_prehash( struct work *work )
|
||||
{
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
blake2s_state ctx;
|
||||
mm128_bswap32_80( edata, work->data );
|
||||
blake2s_init( &ctx, BLAKE2S_OUTBYTES );
|
||||
ctx.buflen = ctx.t[0] = 64;
|
||||
blake2s_compress( &ctx, (const uint8_t*)edata );
|
||||
|
||||
blake2s_16way_init( &blake2s_16w_ctx, BLAKE2S_OUTBYTES );
|
||||
intrlv_16x32( blake2s_16w_ctx.h, ctx.h, ctx.h, ctx.h, ctx.h,
|
||||
ctx.h, ctx.h, ctx.h, ctx.h,
|
||||
ctx.h, ctx.h, ctx.h, ctx.h,
|
||||
ctx.h, ctx.h, ctx.h, ctx.h, 256 );
|
||||
intrlv_16x32( blake2s_16way_vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata,
|
||||
edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
blake2s_16w_ctx.t[0] = 64;
|
||||
return 1;
|
||||
}
|
||||
*/
|
||||
/*
|
||||
int blake2s_16way_prehash( struct work *work )
|
||||
{
|
||||
mm512_bswap32_intrlv80_16x32( blake2s_16way_vdata, work->data );
|
||||
blake2s_16way_init( &blake2s_16w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_16way_update( &blake2s_16w_ctx, blake2s_16way_vdata, 64 );
|
||||
return 1;
|
||||
}
|
||||
*/
|
||||
|
||||
void blake2s_16way_hash( void *output, const void *input )
|
||||
{
|
||||
blake2s_16way_state ctx;
|
||||
@@ -68,40 +30,10 @@ int scanhash_blake2s_16way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
/*
|
||||
// pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( (__m512i*)vdata +16, (__m512i*)blake2s_16way_vdata +16, 3*4*16 );
|
||||
// casti_m512i( vdata, 16 ) = casti_m512i( blake2s_16way_vdata, 16 );
|
||||
// casti_m512i( vdata, 17 ) = casti_m512i( blake2s_16way_vdata, 17 );
|
||||
// casti_m512i( vdata, 18 ) = casti_m512i( blake2s_16way_vdata, 18 );
|
||||
|
||||
// pthread_rwlock_unlock( &g_work_lock );
|
||||
*/
|
||||
/*
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
blake2s_state ctx;
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
blake2s_init( &ctx, BLAKE2S_OUTBYTES );
|
||||
ctx.buflen = ctx.t[0] = 64;
|
||||
blake2s_compress( &ctx, (const uint8_t*)edata );
|
||||
|
||||
blake2s_16way_init( &blake2s_16w_ctx, BLAKE2S_OUTBYTES );
|
||||
intrlv_16x32( blake2s_16w_ctx.h, ctx.h, ctx.h, ctx.h, ctx.h,
|
||||
ctx.h, ctx.h, ctx.h, ctx.h,
|
||||
ctx.h, ctx.h, ctx.h, ctx.h,
|
||||
ctx.h, ctx.h, ctx.h, ctx.h, 256 );
|
||||
intrlv_16x32( blake2s_16way_blake2s_16way_vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata,
|
||||
edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
blake2s_16w_ctx.t[0] = 64;
|
||||
*/
|
||||
|
||||
mm512_bswap32_intrlv80_16x32( vdata, pdata );
|
||||
blake2s_16way_init( &blake2s_16w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_16way_update( &blake2s_16w_ctx, vdata, 64 );
|
||||
|
||||
|
||||
do {
|
||||
*noncev = mm512_bswap_32( _mm512_set_epi32(
|
||||
n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
@@ -131,36 +63,6 @@ int scanhash_blake2s_16way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
static __thread blake2s_8way_state blake2s_8w_ctx;
|
||||
|
||||
/*
|
||||
static blake2s_8way_state blake2s_8w_ctx;
|
||||
static uint32_t blake2s_8way_vdata[20*8] __attribute__ ((aligned (32)));
|
||||
|
||||
int blake2s_8way_prehash( struct work *work )
|
||||
{
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
blake2s_state ctx;
|
||||
mm128_bswap32_80( edata, work->data );
|
||||
blake2s_init( &ctx, BLAKE2S_OUTBYTES );
|
||||
ctx.buflen = ctx.t[0] = 64;
|
||||
blake2s_compress( &ctx, (const uint8_t*)edata );
|
||||
|
||||
blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
casti_m256i( blake2s_8w_ctx.h, i ) = _mm256_set1_epi32( ctx.h[i] );
|
||||
|
||||
casti_m256i( blake2s_8way_vdata, 16 ) = _mm256_set1_epi32( edata[16] );
|
||||
casti_m256i( blake2s_8way_vdata, 17 ) = _mm256_set1_epi32( edata[17] );
|
||||
casti_m256i( blake2s_8way_vdata, 18 ) = _mm256_set1_epi32( edata[18] );
|
||||
|
||||
// intrlv_8x32( blake2s_8w_ctx.h, ctx.h, ctx.h, ctx.h, ctx.h,
|
||||
// ctx.h, ctx.h, ctx.h, ctx.h, 256 );
|
||||
// intrlv_8x32( blake2s_8way_vdata, edata, edata, edata, edata,
|
||||
// edata, edata, edata, edata, 640 );
|
||||
blake2s_8w_ctx.t[0] = 64;
|
||||
}
|
||||
*/
|
||||
|
||||
void blake2s_8way_hash( void *output, const void *input )
|
||||
{
|
||||
blake2s_8way_state ctx;
|
||||
@@ -184,41 +86,10 @@ int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
/*
|
||||
// pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( &vdata[16*8], &blake2s_8way_vdata[16*8], 3*4*8 );
|
||||
// pthread_rwlock_unlock( &g_work_lock );
|
||||
*/
|
||||
/*
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
blake2s_state ctx;
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
blake2s_init( &ctx, BLAKE2S_OUTBYTES );
|
||||
ctx.buflen = ctx.t[0] = 64;
|
||||
blake2s_compress( &ctx, (const uint8_t*)edata );
|
||||
|
||||
blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
casti_m256i( blake2s_8w_ctx.h, i ) = _mm256_set1_epi32( ctx.h[i] );
|
||||
|
||||
casti_m256i( vdata, 16 ) = _mm256_set1_epi32( edata[16] );
|
||||
casti_m256i( vdata, 17 ) = _mm256_set1_epi32( edata[17] );
|
||||
casti_m256i( vdata, 18 ) = _mm256_set1_epi32( edata[18] );
|
||||
|
||||
|
||||
// intrlv_8x32( blake2s_8w_ctx.h, ctx.h, ctx.h, ctx.h, ctx.h,
|
||||
// ctx.h, ctx.h, ctx.h, ctx.h, 256 );
|
||||
// intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||
// edata, edata, edata, edata, 640 );
|
||||
|
||||
blake2s_8w_ctx.t[0] = 64;
|
||||
*/
|
||||
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_8way_update( &blake2s_8w_ctx, vdata, 64 );
|
||||
|
||||
|
||||
do {
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
@@ -246,25 +117,7 @@ int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
||||
#elif defined(BLAKE2S_4WAY)
|
||||
|
||||
static __thread blake2s_4way_state blake2s_4w_ctx;
|
||||
/*
|
||||
static blake2s_4way_state blake2s_4w_ctx;
|
||||
static uint32_t blake2s_4way_vdata[20*4] __attribute__ ((aligned (32)));
|
||||
|
||||
int blake2s_4way_prehash( struct work *work )
|
||||
{
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
blake2s_state ctx;
|
||||
mm128_bswap32_80( edata, work->data );
|
||||
blake2s_init( &ctx, BLAKE2S_OUTBYTES );
|
||||
ctx.buflen = ctx.t[0] = 64;
|
||||
blake2s_compress( &ctx, (const uint8_t*)edata );
|
||||
|
||||
blake2s_4way_init( &blake2s_4w_ctx, BLAKE2S_OUTBYTES );
|
||||
intrlv_4x32( blake2s_4w_ctx.h, ctx.h, ctx.h, ctx.h, ctx.h, 256 );
|
||||
intrlv_4x32( blake2s_4way_vdata, edata, edata, edata, edata, 640 );
|
||||
blake2s_4w_ctx.t[0] = 64;
|
||||
}
|
||||
*/
|
||||
void blake2s_4way_hash( void *output, const void *input )
|
||||
{
|
||||
blake2s_4way_state ctx;
|
||||
@@ -287,15 +140,11 @@ int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
/*
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( vdata, blake2s_4way_vdata, sizeof vdata );
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
*/
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake2s_4way_init( &blake2s_4w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_4way_update( &blake2s_4w_ctx, vdata, 64 );
|
||||
|
||||
|
||||
do {
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
|
@@ -5,15 +5,13 @@ bool register_blake2s_algo( algo_gate_t* gate )
|
||||
#if defined(BLAKE2S_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_blake2s_16way;
|
||||
gate->hash = (void*)&blake2s_16way_hash;
|
||||
// gate->prehash = (void*)&blake2s_16way_prehash;
|
||||
#elif defined(BLAKE2S_8WAY)
|
||||
//#if defined(BLAKE2S_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_blake2s_8way;
|
||||
gate->hash = (void*)&blake2s_8way_hash;
|
||||
// gate->prehash = (void*)&blake2s_8way_prehash;
|
||||
#elif defined(BLAKE2S_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_blake2s_4way;
|
||||
gate->hash = (void*)&blake2s_4way_hash;
|
||||
// gate->prehash = (void*)&blake2s_4way_prehash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_blake2s;
|
||||
gate->hash = (void*)&blake2s_hash;
|
||||
|
@@ -23,22 +23,18 @@ bool register_blake2s_algo( algo_gate_t* gate );
|
||||
void blake2s_16way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int blake2s_16way_prehash( struct work * );
|
||||
|
||||
#elif defined (BLAKE2S_8WAY)
|
||||
|
||||
void blake2s_8way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int blake2s_8way_prehash( struct work * );
|
||||
|
||||
#elif defined (BLAKE2S_4WAY)
|
||||
|
||||
void blake2s_4way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int blake2s_4way_prehash( struct work * );
|
||||
|
||||
#else
|
||||
|
||||
void blake2s_hash( void *state, const void *input );
|
||||
|
@@ -105,8 +105,8 @@ int blake2s_4way_compress( blake2s_4way_state *S, const __m128i* block )
|
||||
|
||||
#define G4W( sigma0, sigma1, a, b, c, d ) \
|
||||
do { \
|
||||
const uint8_t s0 = sigma0; \
|
||||
const uint8_t s1 = sigma1; \
|
||||
uint8_t s0 = sigma0; \
|
||||
uint8_t s1 = sigma1; \
|
||||
a = _mm_add_epi32( _mm_add_epi32( a, b ), m[ s0 ] ); \
|
||||
d = mm128_swap32_16( _mm_xor_si128( d, a ) ); \
|
||||
c = _mm_add_epi32( c, d ); \
|
||||
@@ -120,7 +120,7 @@ do { \
|
||||
|
||||
#define ROUND4W(r) \
|
||||
do { \
|
||||
const uint8_t *sigma = (const uint8_t*)&blake2s_sigma[r]; \
|
||||
uint8_t *sigma = (uint8_t*)&blake2s_sigma[r]; \
|
||||
G4W( sigma[ 0], sigma[ 1], v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G4W( sigma[ 2], sigma[ 3], v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G4W( sigma[ 4], sigma[ 5], v[ 2], v[ 6], v[10], v[14] ); \
|
||||
@@ -317,8 +317,8 @@ do { \
|
||||
|
||||
#define G8W( sigma0, sigma1, a, b, c, d) \
|
||||
do { \
|
||||
const uint8_t s0 = sigma0; \
|
||||
const uint8_t s1 = sigma1; \
|
||||
uint8_t s0 = sigma0; \
|
||||
uint8_t s1 = sigma1; \
|
||||
a = _mm256_add_epi32( _mm256_add_epi32( a, b ), m[ s0 ] ); \
|
||||
d = mm256_swap32_16( _mm256_xor_si256( d, a ) ); \
|
||||
c = _mm256_add_epi32( c, d ); \
|
||||
@@ -331,7 +331,7 @@ do { \
|
||||
|
||||
#define ROUND8W(r) \
|
||||
do { \
|
||||
const uint8_t *sigma = (const uint8_t*)&blake2s_sigma[r]; \
|
||||
uint8_t *sigma = (uint8_t*)&blake2s_sigma[r]; \
|
||||
G8W( sigma[ 0], sigma[ 1], v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G8W( sigma[ 2], sigma[ 3], v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G8W( sigma[ 4], sigma[ 5], v[ 2], v[ 6], v[10], v[14] ); \
|
||||
@@ -529,8 +529,8 @@ int blake2s_16way_compress( blake2s_16way_state *S, const __m512i *block )
|
||||
|
||||
#define G16W( sigma0, sigma1, a, b, c, d) \
|
||||
do { \
|
||||
const uint8_t s0 = sigma0; \
|
||||
const uint8_t s1 = sigma1; \
|
||||
uint8_t s0 = sigma0; \
|
||||
uint8_t s1 = sigma1; \
|
||||
a = _mm512_add_epi32( _mm512_add_epi32( a, b ), m[ s0 ] ); \
|
||||
d = mm512_ror_32( _mm512_xor_si512( d, a ), 16 ); \
|
||||
c = _mm512_add_epi32( c, d ); \
|
||||
@@ -543,7 +543,7 @@ do { \
|
||||
|
||||
#define ROUND16W(r) \
|
||||
do { \
|
||||
const uint8_t *sigma = (const uint8_t*)&blake2s_sigma[r]; \
|
||||
uint8_t *sigma = (uint8_t*)&blake2s_sigma[r]; \
|
||||
G16W( sigma[ 0], sigma[ 1], v[ 0], v[ 4], v[ 8], v[12] ); \
|
||||
G16W( sigma[ 2], sigma[ 3], v[ 1], v[ 5], v[ 9], v[13] ); \
|
||||
G16W( sigma[ 4], sigma[ 5], v[ 2], v[ 6], v[10], v[14] ); \
|
||||
|
@@ -20,7 +20,6 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
//#include "sph-blake2s.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <inttypes.h>
|
||||
@@ -34,7 +33,7 @@
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
/*
|
||||
|
||||
enum blake2s_constant
|
||||
{
|
||||
BLAKE2S_BLOCKBYTES = 64,
|
||||
@@ -43,13 +42,6 @@ enum blake2s_constant
|
||||
BLAKE2S_SALTBYTES = 8,
|
||||
BLAKE2S_PERSONALBYTES = 8
|
||||
};
|
||||
*/
|
||||
|
||||
#define BLAKE2S_BLOCKBYTES 64
|
||||
#define BLAKE2S_OUTBYTES 32
|
||||
#define BLAKE2S_KEYBYTES 32
|
||||
#define BLAKE2S_SALTBYTES 8
|
||||
#define BLAKE2S_PERSONALBYTES 8
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2s_nway_param
|
||||
|
@@ -8,6 +8,8 @@
|
||||
#include "sph-blake2s.h"
|
||||
|
||||
static __thread blake2s_state blake2s_ctx;
|
||||
//static __thread blake2s_state s_ctx;
|
||||
#define MIDLEN 76
|
||||
|
||||
void blake2s_hash( void *output, const void *input )
|
||||
{
|
||||
@@ -17,27 +19,37 @@ void blake2s_hash( void *output, const void *input )
|
||||
memcpy( &ctx, &blake2s_ctx, sizeof ctx );
|
||||
blake2s_update( &ctx, input+64, 16 );
|
||||
|
||||
// blake2s_init(&ctx, BLAKE2S_OUTBYTES);
|
||||
// blake2s_update(&ctx, input, 80);
|
||||
blake2s_final( &ctx, hash, BLAKE2S_OUTBYTES );
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
/*
|
||||
static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
|
||||
{
|
||||
s_ctx.buflen = MIDLEN;
|
||||
memcpy(&s_ctx, &s_midstate, 32 + 16 + MIDLEN);
|
||||
blake2s_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
|
||||
blake2s_final(&s_ctx, (uint8_t*) output, BLAKE2S_OUTBYTES);
|
||||
}
|
||||
*/
|
||||
int scanhash_blake2s( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
int thr_id = mythr->id;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
// midstate
|
||||
blake2s_init( &blake2s_ctx, BLAKE2S_OUTBYTES );
|
||||
@@ -46,12 +58,11 @@ int scanhash_blake2s( struct work *work,
|
||||
do {
|
||||
be32enc(&endiandata[19], n);
|
||||
blake2s_hash( hash64, endiandata );
|
||||
if (hash64[7] <= Htarg )
|
||||
if ( fulltest(hash64, ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
if (hash64[7] <= Htarg && fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return true;
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
@@ -1,74 +0,0 @@
|
||||
#include "decred-gate.h"
|
||||
#include "blake-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined (DECRED_4WAY)
|
||||
|
||||
static __thread blake256_4way_context blake_mid;
|
||||
|
||||
void decred_hash_4way( void *state, const void *input )
|
||||
{
|
||||
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
|
||||
// uint32_t hash0[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash1[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash2[8] __attribute__ ((aligned (32)));
|
||||
// uint32_t hash3[8] __attribute__ ((aligned (32)));
|
||||
const void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
|
||||
int tail_len = 180 - DECRED_MIDSTATE_LEN;
|
||||
blake256_4way_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
|
||||
blake256_4way_update( &ctx, tail, tail_len );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[48*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t _ALIGN(64) edata[48];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// copy to buffer guaranteed to be aligned.
|
||||
memcpy( edata, pdata, 180 );
|
||||
|
||||
// use the old way until new way updated for size.
|
||||
mm128_intrlv_4x32x( vdata, edata, edata, edata, edata, 180*8 );
|
||||
|
||||
blake256_4way_init( &blake_mid );
|
||||
blake256_4way_update( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
|
||||
|
||||
uint32_t *noncep = vdata + DECRED_NONCE_INDEX * 4;
|
||||
do {
|
||||
* noncep = n;
|
||||
*(noncep+1) = n+1;
|
||||
*(noncep+2) = n+2;
|
||||
*(noncep+3) = n+3;
|
||||
|
||||
decred_hash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[DECRED_NONCE_INDEX] = n+i;
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,171 +0,0 @@
|
||||
#include "decred-gate.h"
|
||||
#include <unistd.h>
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
|
||||
uint32_t *decred_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return &work_data[ DECRED_NONCE_INDEX ];
|
||||
}
|
||||
|
||||
long double decred_calc_network_diff( struct work* work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
uint32_t nbits = work->data[ DECRED_NBITS_INDEX ];
|
||||
uint32_t bits = ( nbits & 0xffffff );
|
||||
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
||||
int m;
|
||||
long double d = (long double)0x0000ffff / (long double)bits;
|
||||
|
||||
for ( m = shift; m < 29; m++ )
|
||||
d *= 256.0;
|
||||
for ( m = 29; m < shift; m++ )
|
||||
d /= 256.0;
|
||||
if ( shift == 28 )
|
||||
d *= 256.0; // testnet
|
||||
if ( opt_debug_diff )
|
||||
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", (double)d,
|
||||
shift, bits );
|
||||
return net_diff;
|
||||
}
|
||||
|
||||
void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
|
||||
{
|
||||
// some random extradata to make the work unique
|
||||
work->data[ DECRED_XNONCE_INDEX ] = (rand()*4);
|
||||
work->height = work->data[32];
|
||||
if (!have_longpoll && work->height > *net_blocks + 1)
|
||||
{
|
||||
char netinfo[64] = { 0 };
|
||||
if ( net_diff > 0. )
|
||||
{
|
||||
if (net_diff != work->targetdiff)
|
||||
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
|
||||
work->targetdiff);
|
||||
else
|
||||
sprintf(netinfo, ", diff %.3f", net_diff);
|
||||
}
|
||||
applog(LOG_BLUE, "%s block %d%s", algo_names[opt_algo], work->height,
|
||||
netinfo);
|
||||
*net_blocks = work->height - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void decred_be_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
|
||||
be32enc( &ntime, work->data[ DECRED_NTIME_INDEX ] );
|
||||
be32enc( &nonce, work->data[ DECRED_NONCE_INDEX ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
xnonce2str = abin2hex( (char*)( &work->data[ DECRED_XNONCE_INDEX ] ),
|
||||
sctx->xnonce1_size );
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
#if !defined(min)
|
||||
#define min(a,b) (a>b ? (b) :(a))
|
||||
#endif
|
||||
|
||||
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
uchar merkle_root[64] = { 0 };
|
||||
uint32_t extraheader[32] = { 0 };
|
||||
int headersize = 0;
|
||||
uint32_t* extradata = (uint32_t*) sctx->xnonce1;
|
||||
int i;
|
||||
|
||||
// getwork over stratum, getwork merkle + header passed in coinb1
|
||||
memcpy(merkle_root, sctx->job.coinbase, 32);
|
||||
headersize = min((int)sctx->job.coinbase_size - 32,
|
||||
sizeof(extraheader) );
|
||||
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
|
||||
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = swab32(
|
||||
le32dec( (uint32_t *) sctx->job.prevhash + i ) );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = swab32( be32dec( (uint32_t *) merkle_root + i ) );
|
||||
|
||||
// for ( i = 0; i < 8; i++ ) // prevhash
|
||||
// g_work->data[1 + i] = swab32( g_work->data[1 + i] );
|
||||
// for ( i = 0; i < 8; i++ ) // merkle
|
||||
// g_work->data[9 + i] = swab32( g_work->data[9 + i] );
|
||||
|
||||
for ( i = 0; i < headersize/4; i++ ) // header
|
||||
g_work->data[17 + i] = extraheader[i];
|
||||
// extradata
|
||||
|
||||
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
|
||||
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
|
||||
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
|
||||
g_work->data[i] = 0;
|
||||
g_work->data[37] = (rand()*4) << 8;
|
||||
// block header suffix from coinb2 (stake version)
|
||||
memcpy( &g_work->data[44],
|
||||
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
|
||||
sctx->block_height = g_work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
//applog_hex(&work->data[36], 36);
|
||||
}
|
||||
|
||||
#undef min
|
||||
|
||||
bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id )
|
||||
{
|
||||
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
|
||||
// need to regen g_work..
|
||||
return false;
|
||||
if ( have_stratum && !work->data[0] && !opt_benchmark )
|
||||
{
|
||||
sleep(1);
|
||||
return false;
|
||||
}
|
||||
// extradata: prevent duplicates
|
||||
work->data[ DECRED_XNONCE_INDEX ] += 1;
|
||||
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
|
||||
return true;
|
||||
}
|
||||
|
||||
int decred_get_work_data_size() { return DECRED_DATA_SIZE; }
|
||||
|
||||
bool register_decred_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(DECRED_4WAY)
|
||||
four_way_not_tested();
|
||||
gate->scanhash = (void*)&scanhash_decred_4way;
|
||||
gate->hash = (void*)&decred_hash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_decred;
|
||||
gate->hash = (void*)&decred_hash;
|
||||
#endif
|
||||
gate->optimizations = AVX2_OPT;
|
||||
// gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->build_extraheader = (void*)&decred_build_extraheader;
|
||||
gate->ready_to_mine = (void*)&decred_ready_to_mine;
|
||||
gate->nbits_index = DECRED_NBITS_INDEX;
|
||||
gate->ntime_index = DECRED_NTIME_INDEX;
|
||||
gate->nonce_index = DECRED_NONCE_INDEX;
|
||||
gate->get_work_data_size = (void*)&decred_get_work_data_size;
|
||||
gate->work_cmp_size = DECRED_WORK_COMPARE_SIZE;
|
||||
allow_mininginfo = false;
|
||||
have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
|
@@ -1,36 +0,0 @@
|
||||
#ifndef __DECRED_GATE_H__
|
||||
#define __DECRED_GATE_H__
|
||||
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#define DECRED_NBITS_INDEX 29
|
||||
#define DECRED_NTIME_INDEX 34
|
||||
#define DECRED_NONCE_INDEX 35
|
||||
#define DECRED_XNONCE_INDEX 36
|
||||
#define DECRED_DATA_SIZE 192
|
||||
#define DECRED_WORK_COMPARE_SIZE 140
|
||||
#define DECRED_MIDSTATE_LEN 128
|
||||
|
||||
#if defined (__AVX2__)
|
||||
//void blakehash_84way(void *state, const void *input);
|
||||
//int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#define DECRED_4WAY
|
||||
#endif
|
||||
|
||||
#if defined (DECRED_4WAY)
|
||||
void decred_hash_4way(void *state, const void *input);
|
||||
int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void decred_hash( void *state, const void *input );
|
||||
int scanhash_decred( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
@@ -1,282 +0,0 @@
|
||||
#include "decred-gate.h"
|
||||
|
||||
#if !defined(DECRED_8WAY) && !defined(DECRED_4WAY)
|
||||
|
||||
#include "sph_blake.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <memory.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/*
|
||||
#ifndef min
|
||||
#define min(a,b) (a>b ? b : a)
|
||||
#endif
|
||||
#ifndef max
|
||||
#define max(a,b) (a<b ? b : a)
|
||||
#endif
|
||||
*/
|
||||
/*
|
||||
#define DECRED_NBITS_INDEX 29
|
||||
#define DECRED_NTIME_INDEX 34
|
||||
#define DECRED_NONCE_INDEX 35
|
||||
#define DECRED_XNONCE_INDEX 36
|
||||
#define DECRED_DATA_SIZE 192
|
||||
#define DECRED_WORK_COMPARE_SIZE 140
|
||||
*/
|
||||
static __thread sph_blake256_context blake_mid;
|
||||
static __thread bool ctx_midstate_done = false;
|
||||
|
||||
void decred_hash(void *state, const void *input)
|
||||
{
|
||||
// #define MIDSTATE_LEN 128
|
||||
sph_blake256_context ctx __attribute__ ((aligned (64)));
|
||||
|
||||
uint8_t *ending = (uint8_t*) input;
|
||||
ending += DECRED_MIDSTATE_LEN;
|
||||
|
||||
if (!ctx_midstate_done) {
|
||||
sph_blake256_init(&blake_mid);
|
||||
sph_blake256(&blake_mid, input, DECRED_MIDSTATE_LEN);
|
||||
ctx_midstate_done = true;
|
||||
}
|
||||
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
|
||||
|
||||
sph_blake256(&ctx, ending, (180 - DECRED_MIDSTATE_LEN));
|
||||
sph_blake256_close(&ctx, state);
|
||||
}
|
||||
|
||||
void decred_hash_simple(void *state, const void *input)
|
||||
{
|
||||
sph_blake256_context ctx;
|
||||
sph_blake256_init(&ctx);
|
||||
sph_blake256(&ctx, input, 180);
|
||||
sph_blake256_close(&ctx, state);
|
||||
}
|
||||
|
||||
int scanhash_decred( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[48];
|
||||
uint32_t _ALIGN(64) hash32[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// #define DCR_NONCE_OFT32 35
|
||||
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
ctx_midstate_done = false;
|
||||
|
||||
#if 1
|
||||
memcpy(endiandata, pdata, 180);
|
||||
#else
|
||||
for (int k=0; k < (180/4); k++)
|
||||
be32enc(&endiandata[k], pdata[k]);
|
||||
#endif
|
||||
|
||||
do {
|
||||
//be32enc(&endiandata[DCR_NONCE_OFT32], n);
|
||||
endiandata[DECRED_NONCE_INDEX] = n;
|
||||
decred_hash(hash32, endiandata);
|
||||
|
||||
if (hash32[7] <= HTarget && fulltest(hash32, ptarget))
|
||||
{
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[DECRED_NONCE_INDEX] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
uint32_t *decred_get_nonceptr( uint32_t *work_data )
|
||||
{
|
||||
return &work_data[ DECRED_NONCE_INDEX ];
|
||||
}
|
||||
|
||||
double decred_calc_network_diff( struct work* work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
uint32_t nbits = work->data[ DECRED_NBITS_INDEX ];
|
||||
uint32_t bits = ( nbits & 0xffffff );
|
||||
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
||||
int m;
|
||||
double d = (double)0x0000ffff / (double)bits;
|
||||
|
||||
for ( m = shift; m < 29; m++ )
|
||||
d *= 256.0;
|
||||
for ( m = 29; m < shift; m++ )
|
||||
d /= 256.0;
|
||||
if ( shift == 28 )
|
||||
d *= 256.0; // testnet
|
||||
if ( opt_debug_diff )
|
||||
applog( LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d,
|
||||
shift, bits );
|
||||
return net_diff;
|
||||
}
|
||||
|
||||
void decred_decode_extradata( struct work* work, uint64_t* net_blocks )
|
||||
{
|
||||
// some random extradata to make the work unique
|
||||
work->data[ DECRED_XNONCE_INDEX ] = (rand()*4);
|
||||
work->height = work->data[32];
|
||||
if (!have_longpoll && work->height > *net_blocks + 1)
|
||||
{
|
||||
char netinfo[64] = { 0 };
|
||||
if (net_diff > 0.)
|
||||
{
|
||||
if (net_diff != work->targetdiff)
|
||||
sprintf(netinfo, ", diff %.3f, target %.1f", net_diff,
|
||||
work->targetdiff);
|
||||
else
|
||||
sprintf(netinfo, ", diff %.3f", net_diff);
|
||||
}
|
||||
applog(LOG_BLUE, "%s block %d%s", algo_names[opt_algo], work->height,
|
||||
netinfo);
|
||||
*net_blocks = work->height - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void decred_be_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
{
|
||||
unsigned char *xnonce2str;
|
||||
uint32_t ntime, nonce;
|
||||
char ntimestr[9], noncestr[9];
|
||||
|
||||
be32enc( &ntime, work->data[ DECRED_NTIME_INDEX ] );
|
||||
be32enc( &nonce, work->data[ DECRED_NONCE_INDEX ] );
|
||||
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
|
||||
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
|
||||
xnonce2str = abin2hex( (char*)( &work->data[ DECRED_XNONCE_INDEX ] ),
|
||||
sctx->xnonce1_size );
|
||||
snprintf( req, JSON_BUF_LEN,
|
||||
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
|
||||
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
|
||||
free(xnonce2str);
|
||||
}
|
||||
*/
|
||||
/*
|
||||
// data shared between gen_merkle_root and build_extraheader.
|
||||
__thread uint32_t decred_extraheader[32] = { 0 };
|
||||
__thread int decred_headersize = 0;
|
||||
|
||||
void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
{
|
||||
// getwork over stratum, getwork merkle + header passed in coinb1
|
||||
memcpy(merkle_root, sctx->job.coinbase, 32);
|
||||
decred_headersize = min((int)sctx->job.coinbase_size - 32,
|
||||
sizeof(decred_extraheader) );
|
||||
memcpy( decred_extraheader, &sctx->job.coinbase[32], decred_headersize);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
#define min(a,b) (a>b ? (b) :(a))
|
||||
|
||||
void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
uchar merkle_root[64] = { 0 };
|
||||
uint32_t extraheader[32] = { 0 };
|
||||
int headersize = 0;
|
||||
uint32_t* extradata = (uint32_t*) sctx->xnonce1;
|
||||
size_t t;
|
||||
int i;
|
||||
|
||||
// getwork over stratum, getwork merkle + header passed in coinb1
|
||||
memcpy(merkle_root, sctx->job.coinbase, 32);
|
||||
headersize = min((int)sctx->job.coinbase_size - 32,
|
||||
sizeof(extraheader) );
|
||||
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
|
||||
|
||||
// Increment extranonce2
|
||||
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
||||
|
||||
// Assemble block header
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = le32dec( sctx->job.version );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = swab32(
|
||||
le32dec( (uint32_t *) sctx->job.prevhash + i ) );
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = swab32( be32dec( (uint32_t *) merkle_root + i ) );
|
||||
|
||||
// for ( i = 0; i < 8; i++ ) // prevhash
|
||||
// g_work->data[1 + i] = swab32( g_work->data[1 + i] );
|
||||
// for ( i = 0; i < 8; i++ ) // merkle
|
||||
// g_work->data[9 + i] = swab32( g_work->data[9 + i] );
|
||||
|
||||
for ( i = 0; i < headersize/4; i++ ) // header
|
||||
g_work->data[17 + i] = extraheader[i];
|
||||
// extradata
|
||||
|
||||
for ( i = 0; i < sctx->xnonce1_size/4; i++ )
|
||||
g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
|
||||
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
|
||||
g_work->data[i] = 0;
|
||||
g_work->data[37] = (rand()*4) << 8;
|
||||
// block header suffix from coinb2 (stake version)
|
||||
memcpy( &g_work->data[44],
|
||||
&sctx->job.coinbase[ sctx->job.coinbase_size-4 ], 4 );
|
||||
sctx->bloc_height = g_work->data[32];
|
||||
//applog_hex(work->data, 180);
|
||||
//applog_hex(&work->data[36], 36);
|
||||
}
|
||||
|
||||
#undef min
|
||||
|
||||
bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id )
|
||||
{
|
||||
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
|
||||
// need to regen g_work..
|
||||
return false;
|
||||
if ( have_stratum && !work->data[0] && !opt_benchmark )
|
||||
{
|
||||
sleep(1);
|
||||
return false;
|
||||
}
|
||||
// extradata: prevent duplicates
|
||||
work->data[ DECRED_XNONCE_INDEX ] += 1;
|
||||
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool register_decred_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_decred;
|
||||
gate->hash = (void*)&decred_hash;
|
||||
gate->get_nonceptr = (void*)&decred_get_nonceptr;
|
||||
gate->decode_extra_data = (void*)&decred_decode_extradata;
|
||||
gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
|
||||
gate->work_decode = (void*)&std_be_work_decode;
|
||||
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
|
||||
gate->build_extraheader = (void*)&decred_build_extraheader;
|
||||
gate->ready_to_mine = (void*)&decred_ready_to_mine;
|
||||
gate->nbits_index = DECRED_NBITS_INDEX;
|
||||
gate->ntime_index = DECRED_NTIME_INDEX;
|
||||
gate->nonce_index = DECRED_NONCE_INDEX;
|
||||
gate->work_data_size = DECRED_DATA_SIZE;
|
||||
gate->work_cmp_size = DECRED_WORK_COMPARE_SIZE;
|
||||
allow_mininginfo = false;
|
||||
have_gbt = false;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
#endif
|
@@ -17,7 +17,6 @@
|
||||
|
||||
#include "algo/sha/sph_types.h"
|
||||
#include "sph-blake2s.h"
|
||||
#include "simd-utils.h"
|
||||
|
||||
static const uint32_t blake2s_IV[8] =
|
||||
{
|
||||
@@ -226,71 +225,6 @@ int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES]
|
||||
v[13] = S->t[1] ^ blake2s_IV[5];
|
||||
v[14] = S->f[0] ^ blake2s_IV[6];
|
||||
v[15] = S->f[1] ^ blake2s_IV[7];
|
||||
|
||||
#if 0
|
||||
//#if defined(__SSE2__) // always true
|
||||
|
||||
The only application for this is to do a prehash for the blake2s algorithm.
|
||||
SSE2 also supports 4 way parallel hashing so that is preferred in most cases.
|
||||
Testing has found that using this serial SIMD code for prehash is slower than
|
||||
doing a parallel hash. A parallel hash has more instructions and uses more
|
||||
data. The serial hash uses fewer instructions and data and only needs to
|
||||
interleave the final hash into parallel streams. This has shown negligible
|
||||
improvement on other algos, notably blake256 which is almost identical.
|
||||
Considering the low frequency of prehash no statistically valid change
|
||||
was expected. It was simply better on paper.
|
||||
|
||||
Furthermore, simply defining this macro has an additional negative effect on
|
||||
blake2s as a whole. There are no references to this macro, blake2s-4way does
|
||||
not include it in any header files, it's just another unused macro which should
|
||||
have no effect beyond the preprocessor. But just being visible to the compiler
|
||||
changes things in a dramatic way.
|
||||
|
||||
These 2 things combined reduced the hash rate for blake2s by more than 5% when
|
||||
using serial SIMD for the blake2s prehash over 16way parallel prehash.
|
||||
16way parallel hashing was used in the high frequency nonce loop in both cases.
|
||||
Comsidering the prehash represents 50% of the algorithm and is done once vs
|
||||
the high frequency second half that is done mega, maybe giga, times more it's
|
||||
hard to imagine that big of an effect in either direction.
|
||||
|
||||
#define ROUND( r ) \
|
||||
{ \
|
||||
__m128i *V = (__m128i*)v; \
|
||||
const uint8_t *sigma = blake2s_sigma[r]; \
|
||||
V[0] = _mm_add_epi32( V[0], _mm_add_epi32( V[1], \
|
||||
_mm_set_epi32( m[ sigma[ 6 ] ], m[ sigma[ 4 ] ], \
|
||||
m[ sigma[ 2 ] ], m[ sigma[ 0 ] ] ) ) ); \
|
||||
V[3] = mm128_swap32_16( _mm_xor_si128( V[3], V[0] ) ); \
|
||||
V[2] = _mm_add_epi32( V[2], V[3] ); \
|
||||
V[1] = mm128_ror_32( _mm_xor_si128( V[1], V[2] ), 12 ); \
|
||||
V[0] = _mm_add_epi32( V[0], _mm_add_epi32( V[1], \
|
||||
_mm_set_epi32( m[ sigma[ 7 ] ], m[ sigma[ 5 ] ], \
|
||||
m[ sigma[ 3 ] ], m[ sigma[ 1 ] ] ) ) ); \
|
||||
V[3] = mm128_shuflr32_8( _mm_xor_si128( V[3], V[0] ) ); \
|
||||
V[2] = _mm_add_epi32( V[2], V[3] ); \
|
||||
V[1] = mm128_ror_32( _mm_xor_si128( V[1], V[2] ), 7 ); \
|
||||
V[3] = mm128_shufll_32( V[3] ); \
|
||||
V[2] = mm128_swap_64( V[2] ); \
|
||||
V[1] = mm128_shuflr_32( V[1] ); \
|
||||
V[0] = _mm_add_epi32( V[0], _mm_add_epi32( V[1], \
|
||||
_mm_set_epi32( m[ sigma[14] ], m[ sigma[12] ], \
|
||||
m[ sigma[10] ], m[ sigma[ 8] ] ) ) ); \
|
||||
V[3] = mm128_swap32_16( _mm_xor_si128( V[3], V[0] ) ); \
|
||||
V[2] = _mm_add_epi32( V[2], V[3] ); \
|
||||
V[1] = mm128_ror_32( _mm_xor_si128( V[1], V[2] ), 12 ); \
|
||||
V[0] = _mm_add_epi32( V[0], _mm_add_epi32( V[1], \
|
||||
_mm_set_epi32( m[ sigma[15] ], m[ sigma[13] ], \
|
||||
m[ sigma[11] ], m[ sigma[ 9] ] ) ) ); \
|
||||
V[3] = mm128_shuflr32_8( _mm_xor_si128( V[3], V[0] ) ); \
|
||||
V[2] = _mm_add_epi32( V[2], V[3] ); \
|
||||
V[1] = mm128_ror_32( _mm_xor_si128( V[1], V[2] ), 7 ); \
|
||||
V[3] = mm128_shuflr_32( V[3] ); \
|
||||
V[2] = mm128_swap_64( V[2] ); \
|
||||
V[1] = mm128_shufll_32( V[1] ); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
|
||||
@@ -313,10 +247,7 @@ hard to imagine that big of an effect in either direction.
|
||||
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
||||
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
ROUND( 0 );
|
||||
ROUND( 0 );
|
||||
ROUND( 1 );
|
||||
ROUND( 2 );
|
||||
ROUND( 3 );
|
||||
|
@@ -91,7 +91,6 @@ static inline void secure_zero_memory(void *v, size_t n)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
enum blake2s_constant
|
||||
{
|
||||
BLAKE2S_BLOCKBYTES = 64,
|
||||
@@ -100,13 +99,6 @@ extern "C" {
|
||||
BLAKE2S_SALTBYTES = 8,
|
||||
BLAKE2S_PERSONALBYTES = 8
|
||||
};
|
||||
*/
|
||||
|
||||
#define BLAKE2S_BLOCKBYTES 64
|
||||
#define BLAKE2S_OUTBYTES 32
|
||||
#define BLAKE2S_KEYBYTES 32
|
||||
#define BLAKE2S_SALTBYTES 8
|
||||
#define BLAKE2S_PERSONALBYTES 8
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2s_param
|
||||
|
@@ -554,20 +554,10 @@ int luffa_4way_update_close( luffa_4way_context *state,
|
||||
a = _mm256_xor_si256( a, c0 ); \
|
||||
b = _mm256_xor_si256( b, c1 );
|
||||
|
||||
/*
|
||||
#define MULT2( a0, a1, mask ) \
|
||||
do { \
|
||||
__m256i b = _mm256_xor_si256( a0, \
|
||||
_mm256_shuffle_epi32( _mm256_and_si256(a1,mask), 16 ) ); \
|
||||
a0 = _mm256_or_si256( _mm256_srli_si256(b,4), _mm256_slli_si256(a1,12) ); \
|
||||
a1 = _mm256_or_si256( _mm256_srli_si256(a1,4), _mm256_slli_si256(b,12) ); \
|
||||
} while(0)
|
||||
*/
|
||||
|
||||
#define MULT2( a0, a1, mask ) \
|
||||
#define MULT2( a0, a1 ) \
|
||||
{ \
|
||||
__m256i b = _mm256_xor_si256( a0, \
|
||||
_mm256_shuffle_epi32( _mm256_and_si256( a1, mask ), 16 ) ); \
|
||||
__m256i b = _mm256_xor_si256( a0, _mm256_shuffle_epi32( \
|
||||
_mm256_blend_epi32( a1, m256_zero, 0xee ), 16 ) ); \
|
||||
a0 = _mm256_alignr_epi8( a1, b, 4 ); \
|
||||
a1 = _mm256_alignr_epi8( b, a1, 4 ); \
|
||||
}
|
||||
@@ -682,7 +672,6 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
__m256i *chainv = state->chainv;
|
||||
__m256i msg0, msg1;
|
||||
__m256i x0, x1, x2, x3, x4, x5, x6, x7;
|
||||
const __m256i MASK = m256_const1_i128( 0xffffffff );
|
||||
|
||||
t0 = chainv[0];
|
||||
t1 = chainv[1];
|
||||
@@ -696,7 +685,7 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
t0 = _mm256_xor_si256( t0, chainv[8] );
|
||||
t1 = _mm256_xor_si256( t1, chainv[9] );
|
||||
|
||||
MULT2( t0, t1, MASK );
|
||||
MULT2( t0, t1 );
|
||||
|
||||
msg0 = _mm256_shuffle_epi32( msg[0], 27 );
|
||||
msg1 = _mm256_shuffle_epi32( msg[1], 27 );
|
||||
@@ -715,66 +704,66 @@ void rnd512_2way( luffa_2way_context *state, __m256i *msg )
|
||||
t0 = chainv[0];
|
||||
t1 = chainv[1];
|
||||
|
||||
MULT2( chainv[0], chainv[1], MASK );
|
||||
MULT2( chainv[0], chainv[1] );
|
||||
chainv[0] = _mm256_xor_si256( chainv[0], chainv[2] );
|
||||
chainv[1] = _mm256_xor_si256( chainv[1], chainv[3] );
|
||||
|
||||
MULT2( chainv[2], chainv[3], MASK );
|
||||
MULT2( chainv[2], chainv[3] );
|
||||
chainv[2] = _mm256_xor_si256(chainv[2], chainv[4]);
|
||||
chainv[3] = _mm256_xor_si256(chainv[3], chainv[5]);
|
||||
|
||||
MULT2( chainv[4], chainv[5], MASK );
|
||||
MULT2( chainv[4], chainv[5] );
|
||||
chainv[4] = _mm256_xor_si256(chainv[4], chainv[6]);
|
||||
chainv[5] = _mm256_xor_si256(chainv[5], chainv[7]);
|
||||
|
||||
MULT2( chainv[6], chainv[7], MASK );
|
||||
MULT2( chainv[6], chainv[7] );
|
||||
chainv[6] = _mm256_xor_si256(chainv[6], chainv[8]);
|
||||
chainv[7] = _mm256_xor_si256(chainv[7], chainv[9]);
|
||||
|
||||
MULT2( chainv[8], chainv[9], MASK );
|
||||
MULT2( chainv[8], chainv[9] );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], t0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], t1 );
|
||||
|
||||
t0 = chainv[8];
|
||||
t1 = chainv[9];
|
||||
|
||||
MULT2( chainv[8], chainv[9], MASK );
|
||||
MULT2( chainv[8], chainv[9] );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], chainv[6] );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], chainv[7] );
|
||||
|
||||
MULT2( chainv[6], chainv[7], MASK );
|
||||
MULT2( chainv[6], chainv[7] );
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], chainv[4] );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], chainv[5] );
|
||||
|
||||
MULT2( chainv[4], chainv[5], MASK );
|
||||
MULT2( chainv[4], chainv[5] );
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], chainv[2] );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], chainv[3] );
|
||||
|
||||
MULT2( chainv[2], chainv[3], MASK );
|
||||
MULT2( chainv[2], chainv[3] );
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], chainv[0] );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], chainv[1] );
|
||||
|
||||
MULT2( chainv[0], chainv[1], MASK );
|
||||
MULT2( chainv[0], chainv[1] );
|
||||
chainv[0] = _mm256_xor_si256( _mm256_xor_si256( chainv[0], t0 ), msg0 );
|
||||
chainv[1] = _mm256_xor_si256( _mm256_xor_si256( chainv[1], t1 ), msg1 );
|
||||
|
||||
MULT2( msg0, msg1, MASK );
|
||||
MULT2( msg0, msg1 );
|
||||
chainv[2] = _mm256_xor_si256( chainv[2], msg0 );
|
||||
chainv[3] = _mm256_xor_si256( chainv[3], msg1 );
|
||||
|
||||
MULT2( msg0, msg1, MASK );
|
||||
MULT2( msg0, msg1 );
|
||||
chainv[4] = _mm256_xor_si256( chainv[4], msg0 );
|
||||
chainv[5] = _mm256_xor_si256( chainv[5], msg1 );
|
||||
|
||||
MULT2( msg0, msg1, MASK );
|
||||
MULT2( msg0, msg1 );
|
||||
chainv[6] = _mm256_xor_si256( chainv[6], msg0 );
|
||||
chainv[7] = _mm256_xor_si256( chainv[7], msg1 );
|
||||
|
||||
MULT2( msg0, msg1, MASK );
|
||||
MULT2( msg0, msg1 );
|
||||
chainv[8] = _mm256_xor_si256( chainv[8], msg0 );
|
||||
chainv[9] = _mm256_xor_si256( chainv[9], msg1 );
|
||||
|
||||
MULT2( msg0, msg1, MASK );
|
||||
MULT2( msg0, msg1 );
|
||||
|
||||
chainv[3] = mm256_rol_32( chainv[3], 1 );
|
||||
chainv[5] = mm256_rol_32( chainv[5], 2 );
|
||||
|
@@ -24,45 +24,6 @@ typedef union {
|
||||
#endif
|
||||
} allium_16way_ctx_holder;
|
||||
|
||||
static uint32_t allium_16way_midstate_vars[16*16] __attribute__ ((aligned (64)));
|
||||
static __m512i allium_16way_block0_hash[8] __attribute__ ((aligned (64)));
|
||||
static __m512i allium_16way_block_buf[16] __attribute__ ((aligned (64)));
|
||||
|
||||
int allium_16way_prehash( struct work *work )
|
||||
{
|
||||
uint32_t phash[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
|
||||
// Prehash first block.
|
||||
blake256_transform_le( phash, pdata, 512, 0 );
|
||||
|
||||
// Interleave hash for second block prehash.
|
||||
allium_16way_block0_hash[0] = _mm512_set1_epi32( phash[0] );
|
||||
allium_16way_block0_hash[1] = _mm512_set1_epi32( phash[1] );
|
||||
allium_16way_block0_hash[2] = _mm512_set1_epi32( phash[2] );
|
||||
allium_16way_block0_hash[3] = _mm512_set1_epi32( phash[3] );
|
||||
allium_16way_block0_hash[4] = _mm512_set1_epi32( phash[4] );
|
||||
allium_16way_block0_hash[5] = _mm512_set1_epi32( phash[5] );
|
||||
allium_16way_block0_hash[6] = _mm512_set1_epi32( phash[6] );
|
||||
allium_16way_block0_hash[7] = _mm512_set1_epi32( phash[7] );
|
||||
|
||||
// Build vectored second block, interleave 12 of last 16 bytes of data,
|
||||
// excluding the nonce.
|
||||
allium_16way_block_buf[ 0] = _mm512_set1_epi32( pdata[16] );
|
||||
allium_16way_block_buf[ 1] = _mm512_set1_epi32( pdata[17] );
|
||||
allium_16way_block_buf[ 2] = _mm512_set1_epi32( pdata[18] );
|
||||
|
||||
// Partialy prehash second block without touching nonces in block_buf[3].
|
||||
blake256_16way_round0_prehash_le( allium_16way_midstate_vars,
|
||||
allium_16way_block0_hash, allium_16way_block_buf );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void allium_16way_hash( void *state, const void *midstate_vars,
|
||||
const void *midhash, const void *block )
|
||||
{
|
||||
@@ -239,6 +200,11 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t midstate_vars[16*16] __attribute__ ((aligned (64)));
|
||||
__m512i block0_hash[8] __attribute__ ((aligned (64)));
|
||||
__m512i block_buf[16] __attribute__ ((aligned (64)));
|
||||
uint32_t phash[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -250,19 +216,31 @@ int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( bench ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
// Prehash first block.
|
||||
blake256_transform_le( phash, pdata, 512, 0 );
|
||||
|
||||
memcpy( midstate_vars, allium_16way_midstate_vars, sizeof midstate_vars );
|
||||
memcpy( block0_hash, allium_16way_block0_hash, sizeof block0_hash );
|
||||
memcpy( block_buf, allium_16way_block_buf, sizeof block_buf );
|
||||
// Interleave hash for second block prehash.
|
||||
block0_hash[0] = _mm512_set1_epi32( phash[0] );
|
||||
block0_hash[1] = _mm512_set1_epi32( phash[1] );
|
||||
block0_hash[2] = _mm512_set1_epi32( phash[2] );
|
||||
block0_hash[3] = _mm512_set1_epi32( phash[3] );
|
||||
block0_hash[4] = _mm512_set1_epi32( phash[4] );
|
||||
block0_hash[5] = _mm512_set1_epi32( phash[5] );
|
||||
block0_hash[6] = _mm512_set1_epi32( phash[6] );
|
||||
block0_hash[7] = _mm512_set1_epi32( phash[7] );
|
||||
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
// fill in the nonces
|
||||
block_buf[3] =
|
||||
// Build vectored second block, interleave last 16 bytes of data using
|
||||
// unique nonces.
|
||||
block_buf[ 0] = _mm512_set1_epi32( pdata[16] );
|
||||
block_buf[ 1] = _mm512_set1_epi32( pdata[17] );
|
||||
block_buf[ 2] = _mm512_set1_epi32( pdata[18] );
|
||||
block_buf[ 3] =
|
||||
_mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+ 1, n );
|
||||
|
||||
|
||||
// Partialy prehash second block without touching nonces in block_buf[3].
|
||||
blake256_16way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
allium_16way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
|
||||
@@ -293,44 +271,6 @@ typedef union {
|
||||
#endif
|
||||
} allium_8way_ctx_holder;
|
||||
|
||||
static uint32_t allium_8way_midstate_vars[16*8] __attribute__ ((aligned (64)));
|
||||
static __m256i allium_8way_block0_hash[8] __attribute__ ((aligned (64)));
|
||||
static __m256i allium_8way_block_buf[16] __attribute__ ((aligned (64)));
|
||||
|
||||
int allium_8way_prehash ( struct work *work )
|
||||
{
|
||||
uint32_t phash[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
|
||||
// Prehash first block
|
||||
blake256_transform_le( phash, pdata, 512, 0 );
|
||||
|
||||
allium_8way_block0_hash[0] = _mm256_set1_epi32( phash[0] );
|
||||
allium_8way_block0_hash[1] = _mm256_set1_epi32( phash[1] );
|
||||
allium_8way_block0_hash[2] = _mm256_set1_epi32( phash[2] );
|
||||
allium_8way_block0_hash[3] = _mm256_set1_epi32( phash[3] );
|
||||
allium_8way_block0_hash[4] = _mm256_set1_epi32( phash[4] );
|
||||
allium_8way_block0_hash[5] = _mm256_set1_epi32( phash[5] );
|
||||
allium_8way_block0_hash[6] = _mm256_set1_epi32( phash[6] );
|
||||
allium_8way_block0_hash[7] = _mm256_set1_epi32( phash[7] );
|
||||
|
||||
// Build vectored second block, interleave 12 of the last 16 bytes,
|
||||
// excepting the nonces.
|
||||
allium_8way_block_buf[ 0] = _mm256_set1_epi32( pdata[16] );
|
||||
allium_8way_block_buf[ 1] = _mm256_set1_epi32( pdata[17] );
|
||||
allium_8way_block_buf[ 2] = _mm256_set1_epi32( pdata[18] );
|
||||
|
||||
// Partialy prehash second block without touching nonces
|
||||
blake256_8way_round0_prehash_le( allium_8way_midstate_vars,
|
||||
allium_8way_block0_hash, allium_8way_block_buf );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void allium_8way_hash( void *hash, const void *midstate_vars,
|
||||
const void *midhash, const void *block )
|
||||
{
|
||||
@@ -446,6 +386,11 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t midstate_vars[16*8] __attribute__ ((aligned (64)));
|
||||
__m256i block0_hash[8] __attribute__ ((aligned (64)));
|
||||
__m256i block_buf[16] __attribute__ ((aligned (64)));
|
||||
uint32_t phash[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
uint64_t *ptarget = (uint64_t*)work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -455,17 +400,29 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i eight = m256_const1_32( 8 );
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
// Prehash first block
|
||||
blake256_transform_le( phash, pdata, 512, 0 );
|
||||
|
||||
memcpy( midstate_vars, allium_8way_midstate_vars, sizeof midstate_vars );
|
||||
memcpy( block0_hash, allium_8way_block0_hash, sizeof block0_hash );
|
||||
memcpy( block_buf, allium_8way_block_buf, sizeof block_buf );
|
||||
block0_hash[0] = _mm256_set1_epi32( phash[0] );
|
||||
block0_hash[1] = _mm256_set1_epi32( phash[1] );
|
||||
block0_hash[2] = _mm256_set1_epi32( phash[2] );
|
||||
block0_hash[3] = _mm256_set1_epi32( phash[3] );
|
||||
block0_hash[4] = _mm256_set1_epi32( phash[4] );
|
||||
block0_hash[5] = _mm256_set1_epi32( phash[5] );
|
||||
block0_hash[6] = _mm256_set1_epi32( phash[6] );
|
||||
block0_hash[7] = _mm256_set1_epi32( phash[7] );
|
||||
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
// Build vectored second block, interleave last 16 bytes of data using
|
||||
// unique nonces.
|
||||
block_buf[ 0] = _mm256_set1_epi32( pdata[16] );
|
||||
block_buf[ 1] = _mm256_set1_epi32( pdata[17] );
|
||||
block_buf[ 2] = _mm256_set1_epi32( pdata[18] );
|
||||
block_buf[ 3] = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4,
|
||||
n+ 3, n+ 2, n+ 1, n );
|
||||
|
||||
|
||||
// Partialy prehash second block without touching nonces
|
||||
blake256_8way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
allium_8way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
|
||||
@@ -481,7 +438,6 @@ int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
n += 8;
|
||||
block_buf[ 3] = _mm256_add_epi32( block_buf[ 3], eight );
|
||||
} while ( likely( (n <= last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
|
@@ -131,12 +131,10 @@ bool register_lyra2z_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(LYRA2Z_16WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_16way_thread_init;
|
||||
gate->prehash = (void*)&lyra2z_16way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_16way;
|
||||
// gate->hash = (void*)&lyra2z_16way_hash;
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
|
||||
gate->prehash = (void*)&lyra2z_8way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_lyra2z_8way;
|
||||
// gate->hash = (void*)&lyra2z_8way_hash;
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
@@ -177,10 +175,8 @@ bool register_lyra2h_algo( algo_gate_t* gate )
|
||||
bool register_allium_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (ALLIUM_16WAY)
|
||||
gate->prehash = (void*)&allium_16way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_allium_16way;
|
||||
#elif defined (ALLIUM_8WAY)
|
||||
gate->prehash = (void*)&allium_8way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_allium_8way;
|
||||
#else
|
||||
gate->miner_thread_init = (void*)&init_allium_ctx;
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#include <stdint.h>
|
||||
#include "lyra2.h"
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define LYRA2REV3_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
@@ -101,7 +102,6 @@ bool init_lyra2rev2_ctx();
|
||||
//void lyra2z_16way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int lyra2z_16way_prehash ( struct work *work );
|
||||
bool lyra2z_16way_thread_init();
|
||||
|
||||
#elif defined(LYRA2Z_8WAY)
|
||||
@@ -110,7 +110,6 @@ bool lyra2z_16way_thread_init();
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_8way_thread_init();
|
||||
int lyra2z_8way_prehash ( struct work *work );
|
||||
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
|
||||
@@ -166,13 +165,11 @@ bool register_allium_algo( algo_gate_t* gate );
|
||||
|
||||
int scanhash_allium_16way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int allium_16way_prehash ( struct work *work );
|
||||
|
||||
#elif defined(ALLIUM_8WAY)
|
||||
|
||||
int scanhash_allium_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int allium_8way_prehash ( struct work *work );
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -75,7 +75,7 @@ void lyra2rev2_16way_hash( void *state, const void *input )
|
||||
keccak256_8way_close( &ctx.keccak, vhash );
|
||||
|
||||
dintrlv_8x64( hash8, hash9, hash10, hash11,
|
||||
hash12, hash13, hash14, hash5, vhash, 256 );
|
||||
hash12, hash13, hash14, hash15, vhash, 256 );
|
||||
|
||||
cubehash_full( &ctx.cube, (byte*) hash0, 256, (const byte*) hash0, 32 );
|
||||
cubehash_full( &ctx.cube, (byte*) hash1, 256, (const byte*) hash1, 32 );
|
||||
|
@@ -14,44 +14,6 @@ bool lyra2z_16way_thread_init()
|
||||
return ( lyra2z_16way_matrix = _mm_malloc( 2*LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static uint32_t lyra2z_16way_midstate_vars[16*16] __attribute__ ((aligned (64)));
|
||||
static __m512i lyra2z_16way_block0_hash[8] __attribute__ ((aligned (64)));
|
||||
static __m512i lyra2z_16way_block_buf[16] __attribute__ ((aligned (64)));
|
||||
|
||||
int lyra2z_16way_prehash ( struct work *work )
|
||||
{
|
||||
uint32_t phash[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
|
||||
// Prehash first block
|
||||
blake256_transform_le( phash, pdata, 512, 0 );
|
||||
|
||||
lyra2z_16way_block0_hash[0] = _mm512_set1_epi32( phash[0] );
|
||||
lyra2z_16way_block0_hash[1] = _mm512_set1_epi32( phash[1] );
|
||||
lyra2z_16way_block0_hash[2] = _mm512_set1_epi32( phash[2] );
|
||||
lyra2z_16way_block0_hash[3] = _mm512_set1_epi32( phash[3] );
|
||||
lyra2z_16way_block0_hash[4] = _mm512_set1_epi32( phash[4] );
|
||||
lyra2z_16way_block0_hash[5] = _mm512_set1_epi32( phash[5] );
|
||||
lyra2z_16way_block0_hash[6] = _mm512_set1_epi32( phash[6] );
|
||||
lyra2z_16way_block0_hash[7] = _mm512_set1_epi32( phash[7] );
|
||||
|
||||
// Build vectored second block, interleave 12 of last 16 bytes of data
|
||||
// excepting the nonce.
|
||||
lyra2z_16way_block_buf[ 0] = _mm512_set1_epi32( pdata[16] );
|
||||
lyra2z_16way_block_buf[ 1] = _mm512_set1_epi32( pdata[17] );
|
||||
lyra2z_16way_block_buf[ 2] = _mm512_set1_epi32( pdata[18] );
|
||||
|
||||
// Partialy prehash second block without touching nonces in block_buf[3].
|
||||
blake256_16way_round0_prehash_le( lyra2z_16way_midstate_vars,
|
||||
lyra2z_16way_block0_hash, lyra2z_16way_block_buf );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void lyra2z_16way_hash( void *state, const void *midstate_vars,
|
||||
const void *midhash, const void *block )
|
||||
{
|
||||
@@ -129,6 +91,11 @@ int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t midstate_vars[16*16] __attribute__ ((aligned (64)));
|
||||
__m512i block0_hash[8] __attribute__ ((aligned (64)));
|
||||
__m512i block_buf[16] __attribute__ ((aligned (64)));
|
||||
uint32_t phash[8] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -140,18 +107,30 @@ int scanhash_lyra2z_16way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( bench ) ( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
// Prehash first block
|
||||
blake256_transform_le( phash, pdata, 512, 0 );
|
||||
|
||||
memcpy( midstate_vars, lyra2z_16way_midstate_vars, sizeof midstate_vars );
|
||||
memcpy( block0_hash, lyra2z_16way_block0_hash, sizeof block0_hash );
|
||||
memcpy( block_buf, lyra2z_16way_block_buf, sizeof block_buf );
|
||||
block0_hash[0] = _mm512_set1_epi32( phash[0] );
|
||||
block0_hash[1] = _mm512_set1_epi32( phash[1] );
|
||||
block0_hash[2] = _mm512_set1_epi32( phash[2] );
|
||||
block0_hash[3] = _mm512_set1_epi32( phash[3] );
|
||||
block0_hash[4] = _mm512_set1_epi32( phash[4] );
|
||||
block0_hash[5] = _mm512_set1_epi32( phash[5] );
|
||||
block0_hash[6] = _mm512_set1_epi32( phash[6] );
|
||||
block0_hash[7] = _mm512_set1_epi32( phash[7] );
|
||||
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
// Build vectored second block, interleave last 16 bytes of data using
|
||||
// unique nonces.
|
||||
block_buf[ 0] = _mm512_set1_epi32( pdata[16] );
|
||||
block_buf[ 1] = _mm512_set1_epi32( pdata[17] );
|
||||
block_buf[ 2] = _mm512_set1_epi32( pdata[18] );
|
||||
block_buf[ 3] =
|
||||
_mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+ 9, n+ 8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
|
||||
|
||||
// Partialy prehash second block without touching nonces in block_buf[3].
|
||||
blake256_16way_round0_prehash_le( midstate_vars, block0_hash, block_buf );
|
||||
|
||||
do {
|
||||
lyra2z_16way_hash( hash, midstate_vars, block0_hash, block_buf );
|
||||
|
||||
@@ -178,44 +157,6 @@ bool lyra2z_8way_thread_init()
|
||||
return ( lyra2z_8way_matrix = _mm_malloc( LYRA2Z_MATRIX_SIZE, 64 ) );
|
||||
}
|
||||
|
||||
static uint32_t lyra2z_8way_midstate_vars[16*8] __attribute__ ((aligned (64)));
|
||||
static __m256i lyra2z_8way_block0_hash[8] __attribute__ ((aligned (64)));
|
||||
static __m256i lyra2z_8way_block_buf[16] __attribute__ ((aligned (64)));
|
||||
|
||||
int lyra2z_8way_prehash ( struct work *work )
|
||||
{
|
||||
uint32_t phash[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
|
||||
// Prehash first block
|
||||
blake256_transform_le( phash, pdata, 512, 0 );
|
||||
|
||||
lyra2z_8way_block0_hash[0] = _mm256_set1_epi32( phash[0] );
|
||||
lyra2z_8way_block0_hash[1] = _mm256_set1_epi32( phash[1] );
|
||||
lyra2z_8way_block0_hash[2] = _mm256_set1_epi32( phash[2] );
|
||||
lyra2z_8way_block0_hash[3] = _mm256_set1_epi32( phash[3] );
|
||||
lyra2z_8way_block0_hash[4] = _mm256_set1_epi32( phash[4] );
|
||||
lyra2z_8way_block0_hash[5] = _mm256_set1_epi32( phash[5] );
|
||||
lyra2z_8way_block0_hash[6] = _mm256_set1_epi32( phash[6] );
|
||||
lyra2z_8way_block0_hash[7] = _mm256_set1_epi32( phash[7] );
|
||||
|
||||
// Build vectored second block, interleave last 16 bytes of data using
|
||||
// unique nonces.
|
||||
lyra2z_8way_block_buf[ 0] = _mm256_set1_epi32( pdata[16] );
|
||||
lyra2z_8way_block_buf[ 1] = _mm256_set1_epi32( pdata[17] );
|
||||
lyra2z_8way_block_buf[ 2] = _mm256_set1_epi32( pdata[18] );
|
||||
|
||||
// Partialy prehash second block without touching nonces
|
||||
blake256_8way_round0_prehash_le( lyra2z_8way_midstate_vars,
|
||||
lyra2z_8way_block0_hash, lyra2z_8way_block_buf );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void lyra2z_8way_hash( void *state, const void *midstate_vars,
|
||||
const void *midhash, const void *block )
|
||||
{
|
||||
@@ -260,6 +201,11 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint32_t midstate_vars[16*8] __attribute__ ((aligned (64)));
|
||||
__m256i block0_hash[8] __attribute__ ((aligned (64)));
|
||||
__m256i block_buf[16] __attribute__ ((aligned (64)));
|
||||
uint32_t phash[8] __attribute__ ((aligned (32))) =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
uint32_t *pdata = work->data;
|
||||
uint64_t *ptarget = (uint64_t*)work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -269,14 +215,23 @@ int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i eight = m256_const1_32( 8 );
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
// Prehash first block
|
||||
blake256_transform_le( phash, pdata, 512, 0 );
|
||||
|
||||
memcpy( midstate_vars, lyra2z_8way_midstate_vars, sizeof midstate_vars );
|
||||
memcpy( block0_hash, lyra2z_8way_block0_hash, sizeof block0_hash );
|
||||
memcpy( block_buf, lyra2z_8way_block_buf, sizeof block_buf );
|
||||
block0_hash[0] = _mm256_set1_epi32( phash[0] );
|
||||
block0_hash[1] = _mm256_set1_epi32( phash[1] );
|
||||
block0_hash[2] = _mm256_set1_epi32( phash[2] );
|
||||
block0_hash[3] = _mm256_set1_epi32( phash[3] );
|
||||
block0_hash[4] = _mm256_set1_epi32( phash[4] );
|
||||
block0_hash[5] = _mm256_set1_epi32( phash[5] );
|
||||
block0_hash[6] = _mm256_set1_epi32( phash[6] );
|
||||
block0_hash[7] = _mm256_set1_epi32( phash[7] );
|
||||
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
// Build vectored second block, interleave last 16 bytes of data using
|
||||
// unique nonces.
|
||||
block_buf[ 0] = _mm256_set1_epi32( pdata[16] );
|
||||
block_buf[ 1] = _mm256_set1_epi32( pdata[17] );
|
||||
block_buf[ 2] = _mm256_set1_epi32( pdata[18] );
|
||||
block_buf[ 3] =
|
||||
_mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n +1, n );
|
||||
|
||||
|
@@ -4,24 +4,6 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
long double lbry_calc_network_diff( struct work *work )
|
||||
{
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
|
||||
uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] );
|
||||
uint32_t bits = (nbits & 0xffffff);
|
||||
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
|
||||
long double d = (long double)0x0000ffff / (long double)bits;
|
||||
|
||||
for (int m=shift; m < 29; m++) d *= 256.0;
|
||||
for (int m=29; m < shift; m++) d /= 256.0;
|
||||
if (opt_debug_diff)
|
||||
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
// std_le should work but it doesn't
|
||||
void lbry_le_build_stratum_request( char *req, struct work *work,
|
||||
struct stratum_ctx *sctx )
|
||||
@@ -41,31 +23,6 @@ void lbry_le_build_stratum_request( char *req, struct work *work,
|
||||
free(xnonce2str);
|
||||
}
|
||||
|
||||
/*
|
||||
void lbry_build_block_header( struct work* g_work, uint32_t version,
|
||||
uint32_t *prevhash, uint32_t *merkle_root,
|
||||
uint32_t ntime, uint32_t nbits )
|
||||
{
|
||||
int i;
|
||||
memset( g_work->data, 0, sizeof(g_work->data) );
|
||||
g_work->data[0] = version;
|
||||
|
||||
if ( have_stratum )
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[1 + i] = le32dec( prevhash + i );
|
||||
else
|
||||
for (i = 0; i < 8; i++)
|
||||
g_work->data[ 8-i ] = le32dec( prevhash + i );
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
g_work->data[9 + i] = be32dec( merkle_root + i );
|
||||
|
||||
g_work->data[ LBRY_NTIME_INDEX ] = ntime;
|
||||
g_work->data[ LBRY_NBITS_INDEX ] = nbits;
|
||||
g_work->data[28] = 0x80000000;
|
||||
}
|
||||
*/
|
||||
|
||||
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
{
|
||||
unsigned char merkle_root[64] = { 0 };
|
||||
@@ -112,9 +69,7 @@ bool register_lbry_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&lbry_hash;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
#endif
|
||||
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
|
||||
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
|
||||
// gate->build_block_header = (void*)&build_block_header;
|
||||
gate->build_extraheader = (void*)&lbry_build_extraheader;
|
||||
gate->ntime_index = LBRY_NTIME_INDEX;
|
||||
gate->nbits_index = LBRY_NBITS_INDEX;
|
||||
|
268
algo/sha/sha256dt.c
Normal file
268
algo/sha/sha256dt.c
Normal file
@@ -0,0 +1,268 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "sha-hash-4way.h"
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define SHA256DT_16WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define SHA256DT_8WAY 1
|
||||
#else
|
||||
#define SHA256DT_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(SHA256DT_16WAY)
|
||||
|
||||
int scanhash_sha256dt_16way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
__m512i vdata[32] __attribute__ ((aligned (128)));
|
||||
__m512i block[16] __attribute__ ((aligned (64)));
|
||||
__m512i hash32[8] __attribute__ ((aligned (64)));
|
||||
__m512i initstate[8] __attribute__ ((aligned (64)));
|
||||
__m512i midstate1[8] __attribute__ ((aligned (64)));
|
||||
__m512i midstate2[8] __attribute__ ((aligned (64)));
|
||||
__m512i mexp_pre[16] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const __m512i last_byte = m512_const1_32( 0x80000000 );
|
||||
const __m512i sixteen = m512_const1_32( 16 );
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
vdata[i] = mm512_bcast_i32( pdata[i] );
|
||||
|
||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||
|
||||
vdata[16+4] = last_byte;
|
||||
memset_zero_512( vdata+16 + 5, 10 );
|
||||
vdata[16+15] = mm512_bcast_i32( 0x480 );
|
||||
|
||||
block[ 8] = last_byte;
|
||||
memset_zero_512( block + 9, 6 );
|
||||
block[15] = mm512_bcast_i32( 0x300 );
|
||||
|
||||
initstate[0] = mm512_bcast_i64( 0xdfa9bf2cdfa9bf2c );
|
||||
initstate[1] = mm512_bcast_i64( 0xb72074d4b72074d4 );
|
||||
initstate[2] = mm512_bcast_i64( 0x6bb011226bb01122 );
|
||||
initstate[3] = mm512_bcast_i64( 0xd338e869d338e869 );
|
||||
initstate[4] = mm512_bcast_i64( 0xaa3ff126aa3ff126 );
|
||||
initstate[5] = mm512_bcast_i64( 0x475bbf30475bbf30 );
|
||||
initstate[6] = mm512_bcast_i64( 0x8fd52e5b8fd52e5b );
|
||||
initstate[7] = mm512_bcast_i64( 0x9f75c9ad9f75c9ad );
|
||||
|
||||
sha256_16way_transform_le( midstate1, vdata, initstate );
|
||||
|
||||
// Do 3 rounds on the first 12 bytes of the next block
|
||||
sha256_16way_prehash_3rounds( midstate2, mexp_pre, vdata+16, midstate1 );
|
||||
|
||||
do
|
||||
{
|
||||
sha256_16way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||
mexp_pre );
|
||||
sha256_16way_transform_le( hash32, block, initstate );
|
||||
mm512_block_bswap_32( hash32, hash32 );
|
||||
|
||||
for ( int lane = 0; lane < 16; lane++ )
|
||||
if ( hash32_d7[ lane ] <= targ32_d7 )
|
||||
{
|
||||
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
||||
n += 16;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SHA256DT_8WAY)
|
||||
|
||||
int scanhash_sha256dt_8way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
__m256i vdata[32] __attribute__ ((aligned (64)));
|
||||
__m256i block[16] __attribute__ ((aligned (32)));
|
||||
__m256i hash32[8] __attribute__ ((aligned (32)));
|
||||
__m256i initstate[8] __attribute__ ((aligned (32)));
|
||||
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
||||
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
||||
__m256i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i last_byte = m256_const1_32( 0x80000000 );
|
||||
const __m256i eight = m256_const1_32( 8 );
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
vdata[i] = mm256_bcast_i32( pdata[i] );
|
||||
|
||||
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||
|
||||
vdata[16+4] = last_byte;
|
||||
memset_zero_256( vdata+16 + 5, 10 );
|
||||
vdata[16+15] = mm256_bcast_i32( 0x480 );
|
||||
|
||||
block[ 8] = last_byte;
|
||||
memset_zero_256( block + 9, 6 );
|
||||
block[15] = mm256_bcast_i32( 0x300 );
|
||||
|
||||
// initialize state
|
||||
initstate[0] = mm256_bcast_i64( 0xdfa9bf2cdfa9bf2c );
|
||||
initstate[1] = mm256_bcast_i64( 0xb72074d4b72074d4 );
|
||||
initstate[2] = mm256_bcast_i64( 0x6bb011226bb01122 );
|
||||
initstate[3] = mm256_bcast_i64( 0xd338e869d338e869 );
|
||||
initstate[4] = mm256_bcast_i64( 0xaa3ff126aa3ff126 );
|
||||
initstate[5] = mm256_bcast_i64( 0x475bbf30475bbf30 );
|
||||
initstate[6] = mm256_bcast_i64( 0x8fd52e5b8fd52e5b );
|
||||
initstate[7] = mm256_bcast_i64( 0x9f75c9ad9f75c9ad );
|
||||
|
||||
sha256_8way_transform_le( midstate1, vdata, initstate );
|
||||
|
||||
// Do 3 rounds on the first 12 bytes of the next block
|
||||
sha256_8way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||
|
||||
do
|
||||
{
|
||||
sha256_8way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||
mexp_pre );
|
||||
sha256_8way_transform_le( hash32, block, initstate );
|
||||
mm256_block_bswap_32( hash32, hash32 );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( hash32_d7[ lane ] <= targ32_d7 )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, eight );
|
||||
n += 8;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(SHA256DT_4WAY)
|
||||
|
||||
int scanhash_sha256dt_4way( struct work *work, const uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
__m128i vdata[32] __attribute__ ((aligned (64)));
|
||||
__m128i block[16] __attribute__ ((aligned (32)));
|
||||
__m128i hash32[8] __attribute__ ((aligned (32)));
|
||||
__m128i initstate[8] __attribute__ ((aligned (32)));
|
||||
__m128i midstate[8] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||
uint32_t *pdata = work->data;
|
||||
const uint32_t *ptarget = work->target;
|
||||
const uint32_t targ32_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m128i *noncev = vdata + 19;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const __m128i last_byte = m128_const1_32( 0x80000000 );
|
||||
const __m128i four = m128_const1_32( 4 );
|
||||
|
||||
for ( int i = 0; i < 19; i++ )
|
||||
vdata[i] = mm128_bcast_i32( pdata[i] );
|
||||
|
||||
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
||||
|
||||
vdata[16+4] = last_byte;
|
||||
memset_zero_128( vdata+16 + 5, 10 );
|
||||
vdata[16+15] = mm128_bcast_i32( 0x480 );
|
||||
|
||||
block[ 8] = last_byte;
|
||||
memset_zero_128( block + 9, 6 );
|
||||
block[15] = mm128_bcast_i32( 0x300 );
|
||||
|
||||
// initialize state
|
||||
initstate[0] = mm128_bcast_i64( 0xdfa9bf2cdfa9bf2c );
|
||||
initstate[1] = mm128_bcast_i64( 0xb72074d4b72074d4 );
|
||||
initstate[2] = mm128_bcast_i64( 0x6bb011226bb01122 );
|
||||
initstate[3] = mm128_bcast_i64( 0xd338e869d338e869 );
|
||||
initstate[4] = mm128_bcast_i64( 0xaa3ff126aa3ff126 );
|
||||
initstate[5] = mm128_bcast_i64( 0x475bbf30475bbf30 );
|
||||
initstate[6] = mm128_bcast_i64( 0x8fd52e5b8fd52e5b );
|
||||
initstate[7] = mm128_bcast_i64( 0x9f75c9ad9f75c9ad );
|
||||
|
||||
// hash first 64 bytes of data
|
||||
sha256_4way_transform_le( midstate, vdata, initstate );
|
||||
|
||||
do
|
||||
{
|
||||
sha256_4way_transform_le( block, vdata+16, midstate );
|
||||
sha256_4way_transform_le( hash32, block, initstate );
|
||||
mm128_block_bswap_32( hash32, hash32 );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||
{
|
||||
extr_lane_4x32( lane_hash, hash32, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm_add_epi32( *noncev, four );
|
||||
n += 4;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool register_sha256dt_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||
#if defined(SHA256DT_16WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256dt_16way;
|
||||
#elif defined(SHA256DT_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha256dt_8way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha256dt_4way;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
221
algo/sha/sha512256d-4way.c
Normal file
221
algo/sha/sha512256d-4way.c
Normal file
@@ -0,0 +1,221 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include "sha-hash-4way.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define SHA512256D_8WAY 1
|
||||
#elif defined(__AVX2__)
|
||||
#define SHA512256D_4WAY 1
|
||||
#endif
|
||||
|
||||
#if defined(SHA512256D_8WAY)
|
||||
|
||||
static void sha512256d_8way_init( sha512_8way_context *ctx )
|
||||
{
|
||||
ctx->count = 0;
|
||||
ctx->initialized = true;
|
||||
ctx->val[0] = mm512_bcast_i64( 0x22312194FC2BF72C );
|
||||
ctx->val[1] = mm512_bcast_i64( 0x9F555FA3C84C64C2 );
|
||||
ctx->val[2] = mm512_bcast_i64( 0x2393B86B6F53B151 );
|
||||
ctx->val[3] = mm512_bcast_i64( 0x963877195940EABD );
|
||||
ctx->val[4] = mm512_bcast_i64( 0x96283EE2A88EFFE3 );
|
||||
ctx->val[5] = mm512_bcast_i64( 0xBE5E1E2553863992 );
|
||||
ctx->val[6] = mm512_bcast_i64( 0x2B0199FC2C85B8AA );
|
||||
ctx->val[7] = mm512_bcast_i64( 0x0EB72DDC81C52CA2 );
|
||||
}
|
||||
|
||||
int scanhash_sha512256d_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint64_t hash[8*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
sha512_8way_context ctx;
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint64_t *hash_q3 = &(hash[3*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ_q3 = ((uint64_t*)ptarget)[3];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const __m512i eight = mm512_bcast_i64( 0x0000000800000000 );
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
sha512256d_8way_init( &ctx );
|
||||
sha512_8way_update( &ctx, vdata, 80 );
|
||||
sha512_8way_close( &ctx, hash );
|
||||
|
||||
sha512256d_8way_init( &ctx );
|
||||
sha512_8way_update( &ctx, hash, 32 );
|
||||
sha512_8way_close( &ctx, hash );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash_q3[ lane ] <= targ_q3 && !bench ) )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) && !bench )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev, eight );
|
||||
n += 8;
|
||||
} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(SHA512256D_4WAY)
|
||||
|
||||
static void sha512256d_4way_init( sha512_4way_context *ctx )
|
||||
{
|
||||
ctx->count = 0;
|
||||
ctx->initialized = true;
|
||||
ctx->val[0] = mm256_bcast_i64( 0x22312194FC2BF72C );
|
||||
ctx->val[1] = mm256_bcast_i64( 0x9F555FA3C84C64C2 );
|
||||
ctx->val[2] = mm256_bcast_i64( 0x2393B86B6F53B151 );
|
||||
ctx->val[3] = mm256_bcast_i64( 0x963877195940EABD );
|
||||
ctx->val[4] = mm256_bcast_i64( 0x96283EE2A88EFFE3 );
|
||||
ctx->val[5] = mm256_bcast_i64( 0xBE5E1E2553863992 );
|
||||
ctx->val[6] = mm256_bcast_i64( 0x2B0199FC2C85B8AA );
|
||||
ctx->val[7] = mm256_bcast_i64( 0x0EB72DDC81C52CA2 );
|
||||
}
|
||||
|
||||
int scanhash_sha512256d_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint64_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
sha512_4way_context ctx;
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint64_t *hash_q3 = &(hash[3*4]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint64_t targ_q3 = ((uint64_t*)ptarget)[3];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
const __m256i four = mm256_bcast_i64( 0x0000000400000000 );
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
sha512256d_4way_init( &ctx );
|
||||
sha512_4way_update( &ctx, vdata, 80 );
|
||||
sha512_4way_close( &ctx, hash );
|
||||
|
||||
sha512256d_4way_init( &ctx );
|
||||
sha512_4way_update( &ctx, hash, 32 );
|
||||
sha512_4way_close( &ctx, hash );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash_q3[ lane ] <= targ_q3 )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) && !bench )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev, four );
|
||||
n += 4;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include "sph_sha2.h"
|
||||
|
||||
static const uint64_t H512_256[8] =
|
||||
{
|
||||
0x22312194FC2BF72C, 0x9F555FA3C84C64C2,
|
||||
0x2393B86B6F53B151, 0x963877195940EABD,
|
||||
0x96283EE2A88EFFE3, 0xBE5E1E2553863992,
|
||||
0x2B0199FC2C85B8AA, 0x0EB72DDC81C52CA2,
|
||||
};
|
||||
|
||||
static void sha512256d_init( sph_sha512_context *ctx )
|
||||
{
|
||||
memcpy( ctx->val, H512_256, sizeof H512_256 );
|
||||
ctx->count = 0;
|
||||
}
|
||||
|
||||
int scanhash_sha512256d( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t hash64[8] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
sph_sha512_context ctx;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
|
||||
sha512256d_init( &ctx );
|
||||
sph_sha512( &ctx, endiandata, 80 );
|
||||
sph_sha512_close( &ctx, hash64 );
|
||||
|
||||
sha512256d_init( &ctx );
|
||||
sph_sha512( &ctx, hash64, 32 );
|
||||
sph_sha512_close( &ctx, hash64 );
|
||||
|
||||
if ( hash64[7] <= Htarg )
|
||||
if ( fulltest( hash64, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool register_sha512256d_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
#if defined(SHA512256D_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha512256d_8way;
|
||||
#elif defined(SHA512256D_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha512256d_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha512256d;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
@@ -7,16 +7,8 @@
|
||||
|
||||
#if defined (SKEIN_8WAY)
|
||||
|
||||
static skein512_8way_context skein512_8way_ctx
|
||||
static __thread skein512_8way_context skein512_8way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
static uint32_t skein_8way_vdata[20*8] __attribute__ ((aligned (64)));
|
||||
|
||||
int skein_8way_prehash( struct work *work )
|
||||
{
|
||||
mm512_bswap32_intrlv80_8x64( skein_8way_vdata, work->data );
|
||||
skein512_8way_prehash64( &skein512_8way_ctx, skein_8way_vdata );
|
||||
return 1;
|
||||
}
|
||||
|
||||
void skeinhash_8way( void *state, const void *input )
|
||||
{
|
||||
@@ -37,27 +29,25 @@ void skeinhash_8way( void *state, const void *input )
|
||||
int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash_d7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t targ_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( vdata, skein_8way_vdata, sizeof vdata );
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash_d7 = &(hash[7*8]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t targ_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
skein512_8way_prehash64( &skein512_8way_ctx, vdata );
|
||||
do
|
||||
{
|
||||
skeinhash_8way( hash, vdata );
|
||||
@@ -84,16 +74,8 @@ int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined (SKEIN_4WAY)
|
||||
|
||||
static skein512_4way_context skein512_4way_ctx
|
||||
static __thread skein512_4way_context skein512_4way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
static uint32_t skein_4way_vdata[20*4] __attribute__ ((aligned (64)));
|
||||
|
||||
int skein_4way_prehash( struct work *work )
|
||||
{
|
||||
mm256_bswap32_intrlv80_4x64( skein_4way_vdata, work->data );
|
||||
skein512_4way_prehash64( &skein512_4way_ctx, skein_4way_vdata );
|
||||
return 1;
|
||||
}
|
||||
|
||||
void skeinhash_4way( void *state, const void *input )
|
||||
{
|
||||
@@ -136,24 +118,23 @@ void skeinhash_4way( void *state, const void *input )
|
||||
int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash_d7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t targ_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash_d7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t targ_d7 = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &skein512_4way_ctx, vdata );
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( vdata, skein_4way_vdata, sizeof vdata );
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
|
@@ -7,12 +7,10 @@ bool register_skein_algo( algo_gate_t* gate )
|
||||
#if defined (SKEIN_8WAY)
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_8way;
|
||||
gate->prehash = (void*)&skein_8way_prehash;
|
||||
gate->hash = (void*)&skeinhash_8way;
|
||||
#elif defined (SKEIN_4WAY)
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_skein_4way;
|
||||
gate->prehash = (void*)&skein_4way_prehash;
|
||||
gate->hash = (void*)&skeinhash_4way;
|
||||
#else
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT | SHA_OPT;
|
||||
@@ -27,12 +25,10 @@ bool register_skein2_algo( algo_gate_t* gate )
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
#if defined (SKEIN_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_skein2_8way;
|
||||
// gate->hash = (void*)&skein2hash_8way;
|
||||
gate->prehash = (void*)&skein2_8way_prehash;
|
||||
gate->hash = (void*)&skein2hash_8way;
|
||||
#elif defined (SKEIN_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_skein2_4way;
|
||||
// gate->hash = (void*)&skein2hash_4way;
|
||||
gate->prehash = (void*)&skein2_4way_prehash;
|
||||
gate->hash = (void*)&skein2hash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_skein2;
|
||||
gate->hash = (void*)&skein2hash;
|
||||
|
@@ -14,24 +14,20 @@
|
||||
void skeinhash_8way( void *output, const void *input );
|
||||
int scanhash_skein_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int skein_8way_prehash( struct work * );
|
||||
|
||||
void skein2hash_8way( void *output, const void *input );
|
||||
int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t* hashes_done, struct thr_info *mythr );
|
||||
int skein2_8way_prehash( struct work * );
|
||||
|
||||
#elif defined(SKEIN_4WAY)
|
||||
|
||||
void skeinhash_4way( void *output, const void *input );
|
||||
int scanhash_skein_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int skein_4way_prehash( struct work * );
|
||||
|
||||
void skein2hash_4way( void *output, const void *input );
|
||||
int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t* hashes_done, struct thr_info *mythr );
|
||||
int skein2_4way_prehash( struct work * );
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -5,17 +5,9 @@
|
||||
|
||||
#if defined(SKEIN_8WAY)
|
||||
|
||||
static skein512_8way_context skein512_8way_ctx __attribute__ ((aligned (64)));
|
||||
static uint32_t skein2_8way_vdata[20*8] __attribute__ ((aligned (64)));
|
||||
static __thread skein512_8way_context skein512_8way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
int skein2_8way_prehash( struct work *work )
|
||||
{
|
||||
mm512_bswap32_intrlv80_8x64( skein2_8way_vdata, work->data );
|
||||
skein512_8way_prehash64( &skein512_8way_ctx, skein2_8way_vdata );
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* not used
|
||||
void skein2hash_8way( void *output, const void *input )
|
||||
{
|
||||
uint64_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
@@ -25,7 +17,6 @@ void skein2hash_8way( void *output, const void *input )
|
||||
skein512_8way_final16( &ctx, hash, input + (64*8) );
|
||||
skein512_8way_full( &ctx, output, hash, 64 );
|
||||
}
|
||||
*/
|
||||
|
||||
int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
@@ -45,14 +36,11 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_8way_context ctx;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( vdata, skein2_8way_vdata, sizeof vdata );
|
||||
memcpy( &ctx, &skein512_8way_ctx, sizeof ctx );
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
skein512_8way_prehash64( &ctx, vdata );
|
||||
do
|
||||
{
|
||||
skein512_8way_final16( &ctx, hash, vdata + (16*8) );
|
||||
@@ -79,18 +67,10 @@ int scanhash_skein2_8way( struct work *work, uint32_t max_nonce,
|
||||
}
|
||||
|
||||
#elif defined(SKEIN_4WAY)
|
||||
|
||||
static skein512_4way_context skein512_4way_ctx __attribute__ ((aligned (64)));
|
||||
static uint32_t skein2_4way_vdata[20*4] __attribute__ ((aligned (64)));
|
||||
|
||||
int skein2_4way_prehash( struct work *work )
|
||||
{
|
||||
mm256_bswap32_intrlv80_4x64( skein2_4way_vdata, work->data );
|
||||
skein512_4way_prehash64( &skein512_4way_ctx, skein2_4way_vdata );
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* not used
|
||||
static __thread skein512_4way_context skein512_4way_ctx
|
||||
__attribute__ ((aligned (64)));
|
||||
|
||||
void skein2hash_4way( void *output, const void *input )
|
||||
{
|
||||
skein512_4way_context ctx;
|
||||
@@ -100,7 +80,6 @@ void skein2hash_4way( void *output, const void *input )
|
||||
skein512_4way_final16( &ctx, hash, input + (64*4) );
|
||||
skein512_4way_full( &ctx, output, hash, 64 );
|
||||
}
|
||||
*/
|
||||
|
||||
int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
@@ -120,11 +99,8 @@ int scanhash_skein2_4way( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
skein512_4way_context ctx;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( vdata, skein2_4way_vdata, sizeof vdata );
|
||||
memcpy( &ctx, &skein512_4way_ctx, sizeof ctx );
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &ctx, vdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
|
@@ -25,7 +25,7 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output)
|
||||
|
||||
static __thread x16r_context_overlay hex_ctx;
|
||||
|
||||
int hex_hash( void* output, const void* input, const int thrid )
|
||||
int hex_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
x16r_context_overlay ctx;
|
||||
|
@@ -72,7 +72,7 @@ struct TortureGarden
|
||||
|
||||
// Get a 64-byte hash for given 64-byte input, using given TortureGarden contexts and given algo index
|
||||
static int get_hash( void *output, const void *input, TortureGarden *garden,
|
||||
unsigned int algo, const int thr_id )
|
||||
unsigned int algo, int thr_id )
|
||||
{
|
||||
unsigned char hash[64] __attribute__ ((aligned (64)));
|
||||
int rc = 1;
|
||||
@@ -233,7 +233,7 @@ bool initialize_torture_garden()
|
||||
}
|
||||
|
||||
// Produce a 32-byte hash from 80-byte input data
|
||||
int minotaur_hash( void *output, const void *input, const int thr_id )
|
||||
int minotaur_hash( void *output, const void *input, int thr_id )
|
||||
{
|
||||
unsigned char hash[64] __attribute__ ((aligned (64)));
|
||||
int rc = 1;
|
||||
|
@@ -19,7 +19,7 @@
|
||||
// Perform midstate prehash of hash functions with block size <= 72 bytes,
|
||||
// 76 bytes for hash functions that operate on 32 bit data.
|
||||
|
||||
void x16r_8way_do_prehash( void *vdata, const void *pdata )
|
||||
void x16r_8way_prehash( void *vdata, void *pdata )
|
||||
{
|
||||
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
@@ -106,18 +106,11 @@ void x16r_8way_do_prehash( void *vdata, const void *pdata )
|
||||
}
|
||||
}
|
||||
|
||||
int x16r_8way_prehash( struct work *work )
|
||||
{
|
||||
x16r_gate_get_hash_order( work, x16r_hash_order );
|
||||
x16r_8way_do_prehash( x16r_8way_vdata, work->data );
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Perform the full x16r hash and returns 512 bit intermediate hash.
|
||||
// Called by wrapper hash function to optionally continue hashing and
|
||||
// convert to final hash.
|
||||
|
||||
int x16r_8way_hash_generic( void* output, const void* input, const int thrid )
|
||||
int x16r_8way_hash_generic( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint32_t vhash[20*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[20] __attribute__ ((aligned (16)));
|
||||
@@ -478,7 +471,7 @@ int x16r_8way_hash_generic( void* output, const void* input, const int thrid )
|
||||
|
||||
// x16-r,-s,-rt wrapper called directly by scanhash to repackage 512 bit
|
||||
// hash to 256 bit final hash.
|
||||
int x16r_8way_hash( void* output, const void* input, const int thrid )
|
||||
int x16r_8way_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint8_t hash[64*8] __attribute__ ((aligned (128)));
|
||||
if ( !x16r_8way_hash_generic( hash, input, thrid ) )
|
||||
@@ -502,6 +495,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t bedata1[2];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -514,16 +508,27 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( vdata, x16r_8way_vdata, sizeof vdata );
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
const uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
x16r_8way_prehash( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
||||
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if( algo_gate.hash( hash, vdata, thr_id ) );
|
||||
if( x16r_8way_hash( hash, vdata, thr_id ) );
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
@@ -541,7 +546,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
#elif defined (X16R_4WAY)
|
||||
|
||||
void x16r_4way_do_prehash( void *vdata, const void *pdata )
|
||||
void x16r_4way_prehash( void *vdata, void *pdata )
|
||||
{
|
||||
uint32_t vdata2[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
@@ -622,14 +627,7 @@ void x16r_4way_do_prehash( void *vdata, const void *pdata )
|
||||
}
|
||||
}
|
||||
|
||||
int x16r_4way_prehash( struct work *work )
|
||||
{
|
||||
x16r_gate_get_hash_order( work, x16r_hash_order );
|
||||
x16r_4way_do_prehash( x16r_4way_vdata, work->data );
|
||||
return 1;
|
||||
}
|
||||
|
||||
int x16r_4way_hash_generic( void* output, const void* input, const int thrid )
|
||||
int x16r_4way_hash_generic( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint32_t vhash[20*4] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[20] __attribute__ ((aligned (32)));
|
||||
@@ -637,14 +635,13 @@ int x16r_4way_hash_generic( void* output, const void* input, const int thrid )
|
||||
uint32_t hash2[20] __attribute__ ((aligned (32)));
|
||||
uint32_t hash3[20] __attribute__ ((aligned (32)));
|
||||
x16r_4way_context_overlay ctx;
|
||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||
void *in0 = (void*) hash0;
|
||||
void *in1 = (void*) hash1;
|
||||
void *in2 = (void*) hash2;
|
||||
void *in3 = (void*) hash3;
|
||||
int size = 80;
|
||||
|
||||
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
|
||||
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
@@ -908,7 +905,7 @@ int x16r_4way_hash_generic( void* output, const void* input, const int thrid )
|
||||
return 1;
|
||||
}
|
||||
|
||||
int x16r_4way_hash( void* output, const void* input, const int thrid )
|
||||
int x16r_4way_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint8_t hash[64*4] __attribute__ ((aligned (64)));
|
||||
if ( !x16r_4way_hash_generic( hash, input, thrid ) )
|
||||
@@ -927,6 +924,7 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t bedata1[2];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -939,15 +937,25 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( vdata, x16r_4way_vdata, sizeof vdata );
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
const uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "Hash order %s Ntime %08x", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
x16r_4way_prehash( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if ( algo_gate.hash( hash, vdata, thr_id ) );
|
||||
if ( x16r_4way_hash( hash, vdata, thr_id ) );
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
|
@@ -1,44 +1,26 @@
|
||||
#include "x16r-gate.h"
|
||||
#include "algo/sha/sha256d.h"
|
||||
|
||||
char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = {0};
|
||||
__thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ] = { 0 };
|
||||
|
||||
void (*x16r_gate_get_hash_order) ( const struct work *, char * ) = NULL;
|
||||
void (*x16_r_s_getAlgoString) ( const uint8_t*, char* ) = NULL;
|
||||
|
||||
#if defined (X16R_8WAY)
|
||||
|
||||
x16r_8way_context_overlay x16r_ctx;
|
||||
uint32_t x16r_8way_vdata[24*8] __attribute__ ((aligned (64)));
|
||||
__thread x16r_8way_context_overlay x16r_ctx;
|
||||
|
||||
#elif defined (X16R_4WAY)
|
||||
|
||||
x16r_4way_context_overlay x16r_ctx;
|
||||
uint32_t x16r_4way_vdata[24*4] __attribute__ ((aligned (64)));
|
||||
|
||||
__thread x16r_4way_context_overlay x16r_ctx;
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (X16RV2_8WAY)
|
||||
__thread x16r_context_overlay x16_ctx;
|
||||
|
||||
x16rv2_8way_context_overlay x16rv2_ctx;
|
||||
|
||||
#elif defined (X16RV2_4WAY)
|
||||
|
||||
x16rv2_4way_context_overlay x16rv2_ctx;
|
||||
|
||||
#endif
|
||||
|
||||
x16r_context_overlay x16_ctx;
|
||||
uint32_t x16r_edata[24] __attribute__ ((aligned (32)));
|
||||
|
||||
void x16r_get_hash_order( const struct work *work, char *hash_order )
|
||||
void x16r_getAlgoString( const uint8_t* prevblock, char *output )
|
||||
{
|
||||
char *sptr = hash_order;
|
||||
const uint32_t *pdata = work->data;
|
||||
uint8_t prevblock[16];
|
||||
((uint32_t*)prevblock)[0] = bswap_32( pdata[1] );
|
||||
((uint32_t*)prevblock)[1] = bswap_32( pdata[2] );
|
||||
|
||||
char *sptr = output;
|
||||
for ( int j = 0; j < X16R_HASH_FUNC_COUNT; j++ )
|
||||
{
|
||||
uint8_t b = (15 - j) >> 1; // 16 first ascii hex chars (lsb in uint256)
|
||||
@@ -50,51 +32,38 @@ void x16r_get_hash_order( const struct work *work, char *hash_order )
|
||||
sptr++;
|
||||
}
|
||||
*sptr = '\0';
|
||||
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_INFO, "Hash order %s", x16r_hash_order );
|
||||
}
|
||||
|
||||
void x16s_get_hash_order( const struct work *work, char *hash_order )
|
||||
|
||||
void x16s_getAlgoString( const uint8_t* prevblock, char *output )
|
||||
{
|
||||
const uint32_t *pdata = work->data;
|
||||
uint8_t prevblock[16];
|
||||
((uint32_t*)prevblock)[0] = bswap_32( pdata[1] );
|
||||
((uint32_t*)prevblock)[1] = bswap_32( pdata[2] );
|
||||
strcpy( hash_order, "0123456789ABCDEF" );
|
||||
strcpy( output, "0123456789ABCDEF" );
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
uint8_t b = (15 - i) >> 1; // 16 ascii hex chars, reversed
|
||||
uint8_t algoDigit = (i & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4;
|
||||
int offset = algoDigit;
|
||||
// insert the nth character at the front
|
||||
char oldVal = hash_order[ offset ];
|
||||
char oldVal = output[offset];
|
||||
for( int j = offset; j-- > 0; )
|
||||
hash_order[ j+1 ] = hash_order[ j ];
|
||||
hash_order[ 0 ] = oldVal;
|
||||
output[j+1] = output[j];
|
||||
output[0] = oldVal;
|
||||
}
|
||||
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_INFO, "Hash order %s", x16r_hash_order );
|
||||
}
|
||||
|
||||
bool register_x16r_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_8way;
|
||||
gate->prehash = (void*)&x16r_8way_prehash;
|
||||
gate->hash = (void*)&x16r_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||
gate->prehash = (void*)&x16r_4way_prehash;
|
||||
gate->hash = (void*)&x16r_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16r;
|
||||
gate->prehash = (void*)&x16r_prehash;
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
x16r_gate_get_hash_order = (void*)&x16r_get_hash_order;
|
||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -102,20 +71,17 @@ bool register_x16r_algo( algo_gate_t* gate )
|
||||
bool register_x16rv2_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16RV2_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_8way;
|
||||
gate->prehash = (void*)&x16rv2_8way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rv2_8way;
|
||||
gate->hash = (void*)&x16rv2_8way_hash;
|
||||
#elif defined (X16RV2_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||
gate->prehash = (void*)&x16rv2_4way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rv2_4way;
|
||||
gate->hash = (void*)&x16rv2_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16r;
|
||||
gate->prehash = (void*)&x16rv2_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rv2;
|
||||
gate->hash = (void*)&x16rv2_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
x16r_gate_get_hash_order = (void*)&x16r_get_hash_order;
|
||||
x16_r_s_getAlgoString = (void*)&x16r_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -124,19 +90,16 @@ bool register_x16s_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_8way;
|
||||
gate->prehash = (void*)&x16r_8way_prehash;
|
||||
gate->hash = (void*)&x16r_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||
gate->prehash = (void*)&x16r_4way_prehash;
|
||||
gate->hash = (void*)&x16r_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16r;
|
||||
gate->prehash = (void*)&x16r_prehash;
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
x16r_gate_get_hash_order = (void*)&x16s_get_hash_order;
|
||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
@@ -145,33 +108,30 @@ bool register_x16s_algo( algo_gate_t* gate )
|
||||
//
|
||||
// X16RT
|
||||
|
||||
void x16rt_get_hash_order( const struct work * work, char * hash_order )
|
||||
{
|
||||
uint32_t _ALIGN(64) timehash[8*8];
|
||||
const uint32_t ntime = bswap_32( work->data[17] );
|
||||
const int32_t masked_ntime = ntime & 0xffffff80;
|
||||
uint8_t* data = (uint8_t*)timehash;
|
||||
char *sptr = hash_order;
|
||||
|
||||
sha256d( (unsigned char*)timehash, (const unsigned char*)( &masked_ntime ),
|
||||
sizeof( masked_ntime ) );
|
||||
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash )
|
||||
{
|
||||
int32_t maskedTime = timeStamp & 0xffffff80;
|
||||
sha256d( (unsigned char*)timeHash, (const unsigned char*)( &maskedTime ),
|
||||
sizeof( maskedTime ) );
|
||||
}
|
||||
|
||||
for ( uint8_t j = 0; j < X16R_HASH_FUNC_COUNT; j++ )
|
||||
{
|
||||
void x16rt_getAlgoString( const uint32_t *timeHash, char *output)
|
||||
{
|
||||
char *sptr = output;
|
||||
uint8_t* data = (uint8_t*)timeHash;
|
||||
|
||||
for (uint8_t j = 0; j < X16R_HASH_FUNC_COUNT; j++) {
|
||||
uint8_t b = (15 - j) >> 1; // 16 ascii hex chars, reversed
|
||||
uint8_t algoDigit = (j & 1) ? data[b] & 0xF : data[b] >> 4;
|
||||
|
||||
if ( algoDigit >= 10 )
|
||||
sprintf( sptr, "%c", 'A' + (algoDigit - 10) );
|
||||
if (algoDigit >= 10)
|
||||
sprintf(sptr, "%c", 'A' + (algoDigit - 10));
|
||||
else
|
||||
sprintf( sptr, "%u", (uint32_t) algoDigit );
|
||||
sprintf(sptr, "%u", (uint32_t) algoDigit);
|
||||
sptr++;
|
||||
}
|
||||
*sptr = '\0';
|
||||
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_INFO, "Hash order %s, ntime %08x, time hash %08x",
|
||||
hash_order, ntime, timehash );
|
||||
}
|
||||
|
||||
void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
@@ -262,19 +222,15 @@ void veil_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
||||
bool register_x16rt_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_8way;
|
||||
gate->prehash = (void*)&x16r_8way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
||||
gate->hash = (void*)&x16r_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||
gate->prehash = (void*)&x16r_4way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||
gate->hash = (void*)&x16r_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16r;
|
||||
gate->prehash = (void*)&x16r_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rt;
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
x16r_gate_get_hash_order = (void*)&x16rt_get_hash_order;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
@@ -283,20 +239,16 @@ bool register_x16rt_algo( algo_gate_t* gate )
|
||||
bool register_x16rt_veil_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_8way;
|
||||
gate->prehash = (void*)&x16r_8way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rt_8way;
|
||||
gate->hash = (void*)&x16r_8way_hash;
|
||||
#elif defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||
gate->prehash = (void*)&x16r_4way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rt_4way;
|
||||
gate->hash = (void*)&x16r_4way_hash;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16r;
|
||||
gate->prehash = (void*)&x16r_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x16rt;
|
||||
gate->hash = (void*)&x16r_hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
x16r_gate_get_hash_order = (void*)&x16rt_get_hash_order;
|
||||
gate->build_extraheader = (void*)&veil_build_extraheader;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
@@ -323,23 +275,20 @@ bool register_hex_algo( algo_gate_t* gate )
|
||||
bool register_x21s_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined (X16R_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_8way;
|
||||
gate->prehash = (void*)&x16r_8way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x21s_8way;
|
||||
gate->hash = (void*)&x21s_8way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_8way_thread_init;
|
||||
#elif defined (X16R_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_x16r_4way;
|
||||
gate->prehash = (void*)&x16r_4way_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x21s_4way;
|
||||
gate->hash = (void*)&x21s_4way_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_4way_thread_init;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_x16r;
|
||||
gate->prehash = (void*)&x16r_prehash;
|
||||
gate->scanhash = (void*)&scanhash_x21s;
|
||||
gate->hash = (void*)&x21s_hash;
|
||||
gate->miner_thread_init = (void*)&x21s_thread_init;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT;
|
||||
x16r_gate_get_hash_order = (void*)&x16s_get_hash_order;
|
||||
x16_r_s_getAlgoString = (void*)&x16s_getAlgoString;
|
||||
opt_target_factor = 256.0;
|
||||
return true;
|
||||
};
|
||||
|
@@ -21,7 +21,6 @@
|
||||
#include "algo/shabal/sph_shabal.h"
|
||||
#include "algo/whirlpool/sph_whirlpool.h"
|
||||
#include "algo/sha/sph_sha2.h"
|
||||
#include "algo/tiger/sph_tiger.h"
|
||||
|
||||
#if defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
@@ -58,11 +57,13 @@
|
||||
|
||||
#define X16R_8WAY 1
|
||||
#define X16RV2_8WAY 1
|
||||
#define X16RT_8WAY 1
|
||||
#define X21S_8WAY 1
|
||||
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
|
||||
#define X16RV2_4WAY 1
|
||||
#define X16RT_4WAY 1
|
||||
#define X21S_4WAY 1
|
||||
#define X16R_4WAY 1
|
||||
|
||||
@@ -88,29 +89,23 @@ enum x16r_Algo {
|
||||
X16R_HASH_FUNC_COUNT
|
||||
};
|
||||
|
||||
extern __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ];
|
||||
|
||||
//extern __thread char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ];
|
||||
extern char x16r_hash_order[ X16R_HASH_FUNC_COUNT + 1 ];
|
||||
|
||||
|
||||
extern void (*x16r_gate_get_hash_order) ( const struct work *, char * );
|
||||
|
||||
// x16r, x16rv2
|
||||
void x16r_get_hash_order( const struct work *, char * );
|
||||
// x16s, x21s
|
||||
void x16s_get_hash_order( const struct work *, char * );
|
||||
// x16rt
|
||||
void x16rt_get_hash_order( const struct work *, char * );
|
||||
extern void (*x16_r_s_getAlgoString) ( const uint8_t*, char* );
|
||||
void x16r_getAlgoString( const uint8_t *prevblock, char *output );
|
||||
void x16s_getAlgoString( const uint8_t *prevblock, char *output );
|
||||
void x16rt_getAlgoString( const uint32_t *timeHash, char *output );
|
||||
|
||||
void x16rt_getTimeHash( const uint32_t timeStamp, void* timeHash );
|
||||
|
||||
bool register_x16r_algo( algo_gate_t* gate );
|
||||
bool register_x16rv2_algo( algo_gate_t* gate );
|
||||
bool register_x16s_algo( algo_gate_t* gate );
|
||||
bool register_x16rt_algo( algo_gate_t* gate );
|
||||
bool register_hex_algo( algo_gate_t* gate );
|
||||
bool register_x21s_algo( algo_gate_t* gate );
|
||||
bool register_hex__algo( algo_gate_t* gate );
|
||||
bool register_x21s__algo( algo_gate_t* gate );
|
||||
|
||||
// x16r, x16s, x16rt
|
||||
// x16r, x16s
|
||||
#if defined(X16R_8WAY)
|
||||
|
||||
union _x16r_8way_context_overlay
|
||||
@@ -141,15 +136,15 @@ union _x16r_8way_context_overlay
|
||||
|
||||
typedef union _x16r_8way_context_overlay x16r_8way_context_overlay;
|
||||
|
||||
extern x16r_8way_context_overlay x16r_ctx;
|
||||
extern uint32_t x16r_8way_vdata[24*8] __attribute__ ((aligned (64)));
|
||||
extern __thread x16r_8way_context_overlay x16r_ctx;
|
||||
|
||||
void x16r_8way_do_prehash( void *, const void * );
|
||||
int x16r_8way_prehash( struct work * );
|
||||
int x16r_8way_hash_generic( void *, const void *, const int );
|
||||
int x16r_8way_hash( void *, const void *, const int );
|
||||
void x16r_8way_prehash( void *, void * );
|
||||
int x16r_8way_hash_generic( void *, const void *, int );
|
||||
int x16r_8way_hash( void *, const void *, int );
|
||||
int scanhash_x16r_8way( struct work *, uint32_t ,
|
||||
uint64_t *, struct thr_info * );
|
||||
extern __thread x16r_8way_context_overlay x16r_ctx;
|
||||
|
||||
|
||||
#elif defined(X16R_4WAY)
|
||||
|
||||
@@ -182,15 +177,14 @@ union _x16r_4way_context_overlay
|
||||
|
||||
typedef union _x16r_4way_context_overlay x16r_4way_context_overlay;
|
||||
|
||||
extern x16r_4way_context_overlay x16r_ctx;
|
||||
extern uint32_t x16r_4way_vdata[24*4] __attribute__ ((aligned (64)));
|
||||
extern __thread x16r_4way_context_overlay x16r_ctx;
|
||||
|
||||
void x16r_4way_do_prehash( void *, const void * );
|
||||
int x16r_4way_prehash( struct work * );
|
||||
int x16r_4way_hash_generic( void *, const void *, const int );
|
||||
int x16r_4way_hash( void *, const void *, const int );
|
||||
void x16r_4way_prehash( void *, void * );
|
||||
int x16r_4way_hash_generic( void *, const void *, int );
|
||||
int x16r_4way_hash( void *, const void *, int );
|
||||
int scanhash_x16r_4way( struct work *, uint32_t,
|
||||
uint64_t *, struct thr_info * );
|
||||
extern __thread x16r_4way_context_overlay x16r_ctx;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -223,113 +217,80 @@ union _x16r_context_overlay
|
||||
|
||||
typedef union _x16r_context_overlay x16r_context_overlay;
|
||||
|
||||
extern x16r_context_overlay x16_ctx;
|
||||
extern uint32_t x16r_edata[24] __attribute__ ((aligned (32)));
|
||||
extern __thread x16r_context_overlay x16_ctx;
|
||||
|
||||
void x16r_do_prehash( const void * );
|
||||
int x16r_prehash( const struct work * );
|
||||
int x16r_hash_generic( void *, const void *, const int );
|
||||
int x16r_hash( void *, const void *, const int );
|
||||
void x16r_prehash( void *, void * );
|
||||
int x16r_hash_generic( void *, const void *, int );
|
||||
int x16r_hash( void *, const void *, int );
|
||||
int scanhash_x16r( struct work *, uint32_t, uint64_t *, struct thr_info * );
|
||||
|
||||
// x16Rv2
|
||||
#if defined(X16RV2_8WAY)
|
||||
|
||||
union _x16rv2_8way_context_overlay
|
||||
{
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cubehashParam cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
sph_tiger_context tiger;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
shavite512_4way_context shavite;
|
||||
echo_4way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
typedef union _x16rv2_8way_context_overlay x16rv2_8way_context_overlay;
|
||||
extern x16rv2_8way_context_overlay x16rv2_ctx;
|
||||
|
||||
int x16rv2_8way_prehash( struct work * );
|
||||
int x16rv2_8way_hash( void *state, const void *input, const int thrid );
|
||||
//int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done, struct thr_info *mythr );
|
||||
int x16rv2_8way_hash( void *state, const void *input, int thrid );
|
||||
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X16RV2_4WAY)
|
||||
|
||||
union _x16rv2_4way_context_overlay
|
||||
{
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
shavite512_2way_context shavite;
|
||||
echo_2way_context echo;
|
||||
int x16rv2_4way_hash( void *state, const void *input, int thrid );
|
||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
|
||||
int x16rv2_hash( void *state, const void *input, int thr_id );
|
||||
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
sph_tiger_context tiger;
|
||||
};
|
||||
|
||||
typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
|
||||
extern x16rv2_4way_context_overlay x16rv2_ctx;
|
||||
// x16rt, veil
|
||||
#if defined(X16R_8WAY)
|
||||
|
||||
int x16rv2_4way_hash( void *state, const void *input, const int thrid );
|
||||
int x16rv2_4way_prehash( struct work * );
|
||||
//void x16rt_8way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(X16R_4WAY)
|
||||
|
||||
//void x16rt_4way_hash( void *state, const void *input );
|
||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
int x16rv2_hash( void *state, const void *input, const int thr_id );
|
||||
int x16rv2_prehash( const struct work * );
|
||||
//void x16rt_hash( void *state, const void *input );
|
||||
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
// x21s
|
||||
#if defined(X16R_8WAY)
|
||||
|
||||
int x21s_8way_hash( void *state, const void *input, const int thrid );
|
||||
int x21s_8way_hash( void *state, const void *input, int thrid );
|
||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool x21s_8way_thread_init();
|
||||
|
||||
#elif defined(X16R_4WAY)
|
||||
|
||||
int x21s_4way_hash( void *state, const void *input, const int thrid );
|
||||
int x21s_4way_hash( void *state, const void *input, int thrid );
|
||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool x21s_4way_thread_init();
|
||||
|
||||
#else
|
||||
|
||||
int x21s_hash( void *state, const void *input, const int thr_id );
|
||||
int x21s_hash( void *state, const void *input, int thr_id );
|
||||
int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool x21s_thread_init();
|
||||
|
||||
#endif
|
||||
|
||||
//void hex_hash( void *state, const void *input );
|
||||
int scanhash_hex( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
|
@@ -10,7 +10,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
void x16r_do_prehash( const void *edata )
|
||||
void x16r_prehash( void *edata, void *pdata )
|
||||
{
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
@@ -48,7 +48,7 @@ void x16r_do_prehash( const void *edata )
|
||||
}
|
||||
}
|
||||
|
||||
int x16r_hash_generic( void* output, const void* input, const int thrid )
|
||||
int x16r_hash_generic( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
x16r_context_overlay ctx;
|
||||
@@ -192,15 +192,7 @@ int x16r_hash_generic( void* output, const void* input, const int thrid )
|
||||
return true;
|
||||
}
|
||||
|
||||
int x16r_prehash( const struct work *work )
|
||||
{
|
||||
mm128_bswap32_80( x16r_edata, work->data );
|
||||
x16r_gate_get_hash_order( work, x16r_hash_order );
|
||||
x16r_do_prehash( x16r_edata );
|
||||
return 1;
|
||||
}
|
||||
|
||||
int x16r_hash( void* output, const void* input, const int thrid )
|
||||
int x16r_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint8_t hash[64] __attribute__ ((aligned (64)));
|
||||
if ( !x16r_hash_generic( hash, input, thrid ) )
|
||||
@@ -213,8 +205,8 @@ int x16r_hash( void* output, const void* input, const int thrid )
|
||||
int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(32) hash32[8];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
@@ -224,14 +216,24 @@ int scanhash_x16r( struct work *work, uint32_t max_nonce,
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
pthread_rwlock_rdlock( &g_work_lock );
|
||||
memcpy( edata, x16r_edata, sizeof edata );
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
x16r_prehash( edata, pdata );
|
||||
|
||||
do
|
||||
{
|
||||
edata[19] = nonce;
|
||||
if ( algo_gate.hash( hash32, edata, thr_id ) )
|
||||
if ( x16r_hash( hash32, edata, thr_id ) )
|
||||
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( nonce );
|
||||
|
113
algo/x16/x16rt-4way.c
Normal file
113
algo/x16/x16rt-4way.c
Normal file
@@ -0,0 +1,113 @@
|
||||
#include "x16r-gate.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined (X16R_8WAY)
|
||||
|
||||
int scanhash_x16rt_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) timeHash[8*8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
|
||||
if ( s_ntime != masked_ntime )
|
||||
{
|
||||
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||
s_ntime = masked_ntime;
|
||||
if ( !thr_id )
|
||||
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
|
||||
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
||||
}
|
||||
|
||||
x16r_8way_prehash( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
||||
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if ( x16r_8way_hash( hash, vdata, thr_id ) )
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined (X16R_4WAY)
|
||||
|
||||
int scanhash_x16rt_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) timeHash[4*8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t masked_ntime = bswap_32( pdata[17] ) & 0xffffff80;
|
||||
if ( s_ntime != masked_ntime )
|
||||
{
|
||||
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||
s_ntime = masked_ntime;
|
||||
if ( !thr_id )
|
||||
applog( LOG_INFO, "Hash order %s, Ntime %08x, time hash %08x",
|
||||
x16r_hash_order, bswap_32( pdata[17] ), timeHash );
|
||||
}
|
||||
|
||||
x16r_4way_prehash( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if ( x16r_4way_hash( hash, vdata, thr_id ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( ( n < last_nonce ) && !(*restart) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
53
algo/x16/x16rt.c
Normal file
53
algo/x16/x16rt.c
Normal file
@@ -0,0 +1,53 @@
|
||||
#include "x16r-gate.h"
|
||||
|
||||
#if !defined(X16R_8WAY) && !defined(X16R_4WAY)
|
||||
|
||||
int scanhash_x16rt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t _ALIGN(64) timeHash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t masked_ntime = swab32( pdata[17] ) & 0xffffff80;
|
||||
if ( s_ntime != masked_ntime )
|
||||
{
|
||||
x16rt_getTimeHash( masked_ntime, &timeHash );
|
||||
x16rt_getAlgoString( &timeHash[0], x16r_hash_order );
|
||||
s_ntime = masked_ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order: %s time: (%08x) time hash: (%08x)",
|
||||
x16r_hash_order, swab32( pdata[17] ), timeHash );
|
||||
}
|
||||
|
||||
x16r_prehash( edata, pdata );
|
||||
|
||||
do
|
||||
{
|
||||
edata[19] = nonce;
|
||||
if ( x16r_hash( hash32, edata, thr_id ) )
|
||||
if ( valid_hash( hash32, ptarget ) && !bench )
|
||||
{
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // !defined(X16R_8WAY) && !defined(X16R_4WAY)
|
||||
|
@@ -12,73 +12,37 @@
|
||||
|
||||
#if defined (X16RV2_8WAY)
|
||||
|
||||
void x16rv2_8way_do_prehash( void *vdata, void *pdata )
|
||||
union _x16rv2_8way_context_overlay
|
||||
{
|
||||
uint32_t vdata32[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
blake512_8way_context blake;
|
||||
bmw512_8way_context bmw;
|
||||
skein512_8way_context skein;
|
||||
jh512_8way_context jh;
|
||||
keccak512_8way_context keccak;
|
||||
luffa_4way_context luffa;
|
||||
cubehashParam cube;
|
||||
simd_4way_context simd;
|
||||
hamsi512_8way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_8way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_8way_context sha512;
|
||||
sph_tiger_context tiger;
|
||||
#if defined(__VAES__)
|
||||
groestl512_4way_context groestl;
|
||||
shavite512_4way_context shavite;
|
||||
echo_4way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
} __attribute__ ((aligned (64)));
|
||||
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
typedef union _x16rv2_8way_context_overlay x16rv2_8way_context_overlay;
|
||||
static __thread x16rv2_8way_context_overlay x16rv2_ctx;
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case JH:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
jh512_8way_init( &x16rv2_ctx.jh );
|
||||
jh512_8way_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
case LUFFA:
|
||||
case SHA_512:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sph_tiger_init( &x16rv2_ctx.tiger );
|
||||
sph_tiger( &x16rv2_ctx.tiger, edata, 64 );
|
||||
intrlv_8x64( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
case SKEIN:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
skein512_8way_init( &x16rv2_ctx.skein );
|
||||
skein512_8way_update( &x16rv2_ctx.skein, vdata, 64 );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
cubehashInit( &x16rv2_ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdate( &x16rv2_ctx.cube, (const byte*)edata, 64 );
|
||||
intrlv_8x64( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
case HAMSI:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
hamsi512_8way_init( &x16rv2_ctx.hamsi );
|
||||
hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 64 );
|
||||
break;
|
||||
case SHABAL:
|
||||
mm256_bswap32_intrlv80_8x32( vdata32, pdata );
|
||||
shabal512_8way_init( &x16rv2_ctx.shabal );
|
||||
shabal512_8way_update( &x16rv2_ctx.shabal, vdata32, 64 );
|
||||
rintrlv_8x32_8x64( vdata, vdata32, 640 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sph_whirlpool_init( &x16rv2_ctx.whirlpool );
|
||||
sph_whirlpool( &x16rv2_ctx.whirlpool, edata, 64 );
|
||||
intrlv_8x64( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
default:
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
}
|
||||
}
|
||||
|
||||
int x16rv2_8way_prehash( struct work *work )
|
||||
{
|
||||
x16r_gate_get_hash_order( work, x16r_hash_order );
|
||||
x16rv2_8way_do_prehash( x16r_8way_vdata, work->data );
|
||||
return 1;
|
||||
}
|
||||
|
||||
int x16rv2_8way_hash( void* output, const void* input, const int thrid )
|
||||
int x16rv2_8way_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint32_t vhash[24*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash0[24] __attribute__ ((aligned (32)));
|
||||
@@ -593,28 +557,50 @@ int x16rv2_8way_hash( void* output, const void* input, const int thrid )
|
||||
return 1;
|
||||
}
|
||||
|
||||
#elif defined (X16RV2_4WAY)
|
||||
|
||||
// Pad the 24 bytes tiger hash to 64 bytes
|
||||
inline void padtiger512( uint32_t* hash )
|
||||
int scanhash_x16rv2_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
|
||||
}
|
||||
|
||||
void x16rv2_4way_do_prehash( void *vdata, void *pdata )
|
||||
{
|
||||
uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata2[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
uint32_t n = first_nonce;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
const int thr_id = mythr->id;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
const uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
|
||||
switch ( algo )
|
||||
{
|
||||
case JH:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
jh512_4way_init( &x16rv2_ctx.jh );
|
||||
jh512_4way_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
jh512_8way_init( &x16rv2_ctx.jh );
|
||||
jh512_8way_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
case LUFFA:
|
||||
@@ -622,45 +608,100 @@ void x16rv2_4way_do_prehash( void *vdata, void *pdata )
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sph_tiger_init( &x16rv2_ctx.tiger );
|
||||
sph_tiger( &x16rv2_ctx.tiger, edata, 64 );
|
||||
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
intrlv_8x64( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
case SKEIN:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
skein512_8way_init( &x16rv2_ctx.skein );
|
||||
skein512_8way_update( &x16rv2_ctx.skein, vdata, 64 );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
cubehashInit( &x16rv2_ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdate( &x16rv2_ctx.cube, (const byte*)edata, 64 );
|
||||
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
intrlv_8x64( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
case HAMSI:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
hamsi512_4way_init( &x16rv2_ctx.hamsi );
|
||||
hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 64 );
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
hamsi512_8way_init( &x16rv2_ctx.hamsi );
|
||||
hamsi512_8way_update( &x16rv2_ctx.hamsi, vdata, 64 );
|
||||
break;
|
||||
case SHABAL:
|
||||
mm128_bswap32_intrlv80_4x32( vdata32, pdata );
|
||||
shabal512_4way_init( &x16rv2_ctx.shabal );
|
||||
shabal512_4way_update( &x16rv2_ctx.shabal, vdata32, 64 );
|
||||
rintrlv_4x32_4x64( vdata, vdata32, 640 );
|
||||
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
|
||||
shabal512_8way_init( &x16rv2_ctx.shabal );
|
||||
shabal512_8way_update( &x16rv2_ctx.shabal, vdata2, 64 );
|
||||
rintrlv_8x32_8x64( vdata, vdata2, 640 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sph_whirlpool_init( &x16rv2_ctx.whirlpool );
|
||||
sph_whirlpool( &x16rv2_ctx.whirlpool, edata, 64 );
|
||||
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
intrlv_8x64( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
default:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
}
|
||||
}
|
||||
|
||||
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
||||
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if ( x16rv2_8way_hash( hash, vdata, thr_id ) )
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int x16rv2_4way_prehash( struct work *work )
|
||||
#elif defined (X16RV2_4WAY)
|
||||
|
||||
union _x16rv2_4way_context_overlay
|
||||
{
|
||||
x16r_gate_get_hash_order( work, x16r_hash_order );
|
||||
x16rv2_4way_do_prehash( x16r_4way_vdata, work->data );
|
||||
return 1;
|
||||
blake512_4way_context blake;
|
||||
bmw512_4way_context bmw;
|
||||
#if defined(__VAES__)
|
||||
groestl512_2way_context groestl;
|
||||
shavite512_2way_context shavite;
|
||||
echo_2way_context echo;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
shavite512_context shavite;
|
||||
hashState_echo echo;
|
||||
#endif
|
||||
skein512_4way_context skein;
|
||||
jh512_4way_context jh;
|
||||
keccak512_4way_context keccak;
|
||||
luffa_2way_context luffa;
|
||||
cubehashParam cube;
|
||||
simd_2way_context simd;
|
||||
hamsi512_4way_context hamsi;
|
||||
hashState_fugue fugue;
|
||||
shabal512_4way_context shabal;
|
||||
sph_whirlpool_context whirlpool;
|
||||
sha512_4way_context sha512;
|
||||
sph_tiger_context tiger;
|
||||
};
|
||||
typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
|
||||
|
||||
static __thread x16rv2_4way_context_overlay x16rv2_ctx;
|
||||
|
||||
// Pad the 24 bytes tiger hash to 64 bytes
|
||||
inline void padtiger512( uint32_t* hash )
|
||||
{
|
||||
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
|
||||
}
|
||||
|
||||
int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
@@ -1007,4 +1048,107 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_x16rv2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata32[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t edata[20];
|
||||
uint32_t bedata1[2];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
__m256i *noncev = (__m256i*)vdata + 9;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0fff;
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
const uint32_t ntime = bswap_32(pdata[17]);
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
// Do midstate prehash on hash functions with block size <= 64 bytes.
|
||||
const char elem = x16r_hash_order[0];
|
||||
const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
|
||||
switch ( algo )
|
||||
{
|
||||
case JH:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
jh512_4way_init( &x16rv2_ctx.jh );
|
||||
jh512_4way_update( &x16rv2_ctx.jh, vdata, 64 );
|
||||
break;
|
||||
case KECCAK:
|
||||
case LUFFA:
|
||||
case SHA_512:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sph_tiger_init( &x16rv2_ctx.tiger );
|
||||
sph_tiger( &x16rv2_ctx.tiger, edata, 64 );
|
||||
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
case SKEIN:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
skein512_4way_prehash64( &x16r_ctx.skein, vdata );
|
||||
break;
|
||||
case CUBEHASH:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
cubehashInit( &x16rv2_ctx.cube, 512, 16, 32 );
|
||||
cubehashUpdate( &x16rv2_ctx.cube, (const byte*)edata, 64 );
|
||||
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
case HAMSI:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
hamsi512_4way_init( &x16rv2_ctx.hamsi );
|
||||
hamsi512_4way_update( &x16rv2_ctx.hamsi, vdata, 64 );
|
||||
break;
|
||||
case SHABAL:
|
||||
mm128_bswap32_intrlv80_4x32( vdata32, pdata );
|
||||
shabal512_4way_init( &x16rv2_ctx.shabal );
|
||||
shabal512_4way_update( &x16rv2_ctx.shabal, vdata32, 64 );
|
||||
rintrlv_4x32_4x64( vdata, vdata32, 640 );
|
||||
break;
|
||||
case WHIRLPOOL:
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
sph_whirlpool_init( &x16rv2_ctx.whirlpool );
|
||||
sph_whirlpool( &x16rv2_ctx.whirlpool, edata, 64 );
|
||||
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
|
||||
break;
|
||||
default:
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
}
|
||||
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
|
||||
do
|
||||
{
|
||||
if ( x16rv2_4way_hash( hash, vdata, thr_id ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -43,16 +43,9 @@ inline void padtiger512(uint32_t* hash) {
|
||||
for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
|
||||
}
|
||||
|
||||
// no prehash
|
||||
int x16rv2_prehash( const struct work *work )
|
||||
int x16rv2_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
x16r_gate_get_hash_order( work, x16r_hash_order );
|
||||
return 1;
|
||||
}
|
||||
|
||||
int x16rv2_hash( void* output, const void* input, const int thrid )
|
||||
{
|
||||
uint32_t _ALIGN(32) hash[16];
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
x16rv2_context_overlay ctx;
|
||||
void *in = (void*) input;
|
||||
int size = 80;
|
||||
@@ -177,4 +170,52 @@ int x16rv2_hash( void* output, const void* input, const int thrid )
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_x16rv2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*) (&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)",
|
||||
x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
do
|
||||
{
|
||||
edata[19] = nonce;
|
||||
if ( x16rv2_hash( hash32, edata, thr_id ) )
|
||||
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -30,7 +30,7 @@ union _x21s_8way_context_overlay
|
||||
|
||||
typedef union _x21s_8way_context_overlay x21s_8way_context_overlay;
|
||||
|
||||
int x21s_8way_hash( void* output, const void* input, const int thrid )
|
||||
int x21s_8way_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint32_t vhash[16*8] __attribute__ ((aligned (128)));
|
||||
uint8_t shash[64*8] __attribute__ ((aligned (64)));
|
||||
@@ -129,6 +129,66 @@ int x21s_8way_hash( void* output, const void* input, const int thrid )
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_x21s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (128)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &hash[7<<3];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t last_nonce = max_nonce - 16;
|
||||
const int thr_id = mythr->id;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
x16r_8way_prehash( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32( _mm512_set_epi32(
|
||||
n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if ( x21s_8way_hash( hash, vdata, thr_id ) )
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( unlikely( hash7[lane] <= Htarg ) )
|
||||
{
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_solution( work, lane_hash, mythr );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool x21s_8way_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
@@ -155,7 +215,7 @@ union _x21s_4way_context_overlay
|
||||
|
||||
typedef union _x21s_4way_context_overlay x21s_4way_context_overlay;
|
||||
|
||||
int x21s_4way_hash( void* output, const void* input, const int thrid )
|
||||
int x21s_4way_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint32_t vhash[16*4] __attribute__ ((aligned (64)));
|
||||
uint8_t shash[64*4] __attribute__ ((aligned (64)));
|
||||
@@ -231,6 +291,58 @@ int x21s_4way_hash( void* output, const void* input, const int thrid )
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_x21s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t bedata1[2] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
bedata1[0] = bswap_32( pdata[1] );
|
||||
bedata1[1] = bswap_32( pdata[2] );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
uint32_t ntime = bswap_32( pdata[17] );
|
||||
if ( s_ntime != ntime )
|
||||
{
|
||||
x16_r_s_getAlgoString( (const uint8_t*)bedata1, x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_DEBUG, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
x16r_4way_prehash( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do
|
||||
{
|
||||
if ( x21s_4way_hash( hash, vdata, thr_id ) )
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n+i );
|
||||
submit_solution( work, hash+(i<<3), mythr );
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( likely( ( n < last_nonce ) && !(*restart) ) );
|
||||
pdata[19] = n;
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool x21s_4way_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
|
@@ -27,7 +27,7 @@ union _x21s_context_overlay
|
||||
};
|
||||
typedef union _x21s_context_overlay x21s_context_overlay;
|
||||
|
||||
int x21s_hash( void* output, const void* input, const int thrid )
|
||||
int x21s_hash( void* output, const void* input, int thrid )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[16];
|
||||
x21s_context_overlay ctx;
|
||||
@@ -57,6 +57,50 @@ int x21s_hash( void* output, const void* input, const int thrid )
|
||||
return 1;
|
||||
}
|
||||
|
||||
int scanhash_x21s( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) edata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const int thr_id = mythr->id;
|
||||
uint32_t nonce = first_nonce;
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
const bool bench = opt_benchmark;
|
||||
if ( bench ) ptarget[7] = 0x0cff;
|
||||
|
||||
mm128_bswap32_80( edata, pdata );
|
||||
|
||||
static __thread uint32_t s_ntime = UINT32_MAX;
|
||||
if ( s_ntime != pdata[17] )
|
||||
{
|
||||
uint32_t ntime = swab32(pdata[17]);
|
||||
x16_r_s_getAlgoString( (const uint8_t*)(&edata[1]), x16r_hash_order );
|
||||
s_ntime = ntime;
|
||||
if ( opt_debug && !thr_id )
|
||||
applog( LOG_INFO, "hash order %s (%08x)", x16r_hash_order, ntime );
|
||||
}
|
||||
|
||||
x16r_prehash( edata, pdata );
|
||||
|
||||
do
|
||||
{
|
||||
edata[19] = nonce;
|
||||
if ( x21s_hash( hash32, edata, thr_id ) )
|
||||
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
|
||||
{
|
||||
pdata[19] = bswap_32( nonce );
|
||||
submit_solution( work, hash32, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !(*restart) );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool x21s_thread_init()
|
||||
{
|
||||
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
||||
|
@@ -257,6 +257,7 @@ int scanhash_x17_8way( struct work *work, uint32_t max_nonce,
|
||||
const __m512i eight = m512_const1_64( 8 );
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
// convert LE32 to LE64
|
||||
edata[0] = mm128_swap64_32( casti_m128i( pdata, 0 ) );
|
||||
edata[1] = mm128_swap64_32( casti_m128i( pdata, 1 ) );
|
||||
edata[2] = mm128_swap64_32( casti_m128i( pdata, 2 ) );
|
||||
@@ -470,6 +471,7 @@ int scanhash_x17_4way( struct work *work, uint32_t max_nonce,
|
||||
const __m256i four = m256_const1_64( 4 );
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
// convert LE32 to LE64
|
||||
edata[0] = mm128_swap64_32( casti_m128i( pdata, 0 ) );
|
||||
edata[1] = mm128_swap64_32( casti_m128i( pdata, 1 ) );
|
||||
edata[2] = mm128_swap64_32( casti_m128i( pdata, 2 ) );
|
||||
|
@@ -31,26 +31,8 @@
|
||||
|
||||
yespower_params_t yespower_params;
|
||||
|
||||
// master g_work
|
||||
sha256_context yespower_sha256_prehash_ctx;
|
||||
uint32_t _ALIGN(64) yespower_endiandata[20];
|
||||
|
||||
// local work
|
||||
__thread sha256_context sha256_prehash_ctx;
|
||||
|
||||
|
||||
int yespower_sha256_prehash( struct work *work )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &yespower_endiandata[k], pdata[k] );
|
||||
|
||||
sha256_ctx_init( &yespower_sha256_prehash_ctx );
|
||||
sha256_update( &yespower_sha256_prehash_ctx, yespower_endiandata, 64 );
|
||||
|
||||
return 1;
|
||||
}
|
||||
// YESPOWER
|
||||
|
||||
int yespower_hash( const char *input, char *output, uint32_t len, int thrid )
|
||||
@@ -71,15 +53,14 @@ int scanhash_yespower( struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
// pthread_rwlock_rdlock( &g_work_lock );
|
||||
|
||||
memcpy( endiandata, yespower_endiandata, sizeof endiandata );
|
||||
memcpy( &sha256_prehash_ctx, &yespower_sha256_prehash_ctx, sizeof sha256_prehash_ctx );
|
||||
|
||||
// pthread_rwlock_unlock( &g_work_lock );
|
||||
|
||||
for ( int k = 0; k < 19; k++ )
|
||||
be32enc( &endiandata[k], pdata[k] );
|
||||
endiandata[19] = n;
|
||||
|
||||
// do sha256 prehash
|
||||
sha256_ctx_init( &sha256_prehash_ctx );
|
||||
sha256_update( &sha256_prehash_ctx, endiandata, 64 );
|
||||
|
||||
do {
|
||||
if ( yespower_hash( (char*)endiandata, (char*)vhash, 80, thr_id ) )
|
||||
if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark )
|
||||
@@ -159,7 +140,6 @@ bool register_yespower_algo( algo_gate_t* gate )
|
||||
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
gate->prehash = (void*)&yespower_sha256_prehash;
|
||||
gate->hash = (void*)&yespower_hash;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
@@ -174,7 +154,6 @@ bool register_yespowerr16_algo( algo_gate_t* gate )
|
||||
yespower_params.perslen = 0;
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
gate->prehash = (void*)&yespower_sha256_prehash;
|
||||
gate->hash = (void*)&yespower_hash;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
@@ -186,7 +165,6 @@ bool register_yescrypt_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
gate->prehash = (void*)&yespower_sha256_prehash;
|
||||
yespower_params.version = YESPOWER_0_5;
|
||||
opt_target_factor = 65536.0;
|
||||
|
||||
@@ -220,7 +198,6 @@ bool register_yescryptr8_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
gate->prehash = (void*)&yespower_sha256_prehash;
|
||||
yespower_params.version = YESPOWER_0_5;
|
||||
yespower_params.N = 2048;
|
||||
yespower_params.r = 8;
|
||||
@@ -234,7 +211,6 @@ bool register_yescryptr16_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
gate->prehash = (void*)&yespower_sha256_prehash;
|
||||
yespower_params.version = YESPOWER_0_5;
|
||||
yespower_params.N = 4096;
|
||||
yespower_params.r = 16;
|
||||
@@ -248,7 +224,6 @@ bool register_yescryptr32_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = SSE2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
gate->prehash = (void*)&yespower_sha256_prehash;
|
||||
yespower_params.version = YESPOWER_0_5;
|
||||
yespower_params.N = 4096;
|
||||
yespower_params.r = 32;
|
||||
|
@@ -80,8 +80,6 @@ extern yespower_params_t yespower_params;
|
||||
|
||||
extern __thread sha256_context sha256_prehash_ctx;
|
||||
|
||||
int yespower_sha256_prehash( struct work *work );
|
||||
|
||||
/**
|
||||
* yespower_init_local(local):
|
||||
* Initialize the thread-local (RAM) data structure. Actual memory allocation
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.21.3])
|
||||
AC_INIT([cpuminer-opt], [3.22.2])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
433
cpu-miner.c
433
cpu-miner.c
@@ -3,7 +3,7 @@
|
||||
* Copyright 2012-2014 pooler
|
||||
* Copyright 2014 Lucas Jones
|
||||
* Copyright 2014-2016 Tanguy Pruvot
|
||||
* Copyright 2016-2021 Jay D Dee
|
||||
* Copyright 2016-2023 Jay D Dee
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
@@ -37,7 +37,7 @@
|
||||
#include <curl/curl.h>
|
||||
#include <jansson.h>
|
||||
#include <openssl/sha.h>
|
||||
#include <mm_malloc.h>
|
||||
//#include <mm_malloc.h>
|
||||
#include "sysinfos.c"
|
||||
#include "algo/sha/sha256d.h"
|
||||
|
||||
@@ -121,7 +121,6 @@ static uint64_t opt_affinity = 0xFFFFFFFFFFFFFFFFULL; // default, use all cores
|
||||
int opt_priority = 0; // deprecated
|
||||
int num_cpus = 1;
|
||||
int num_cpugroups = 1; // For Windows
|
||||
#define max_cpus 256 // max for affinity
|
||||
char *rpc_url = NULL;
|
||||
char *rpc_userpass = NULL;
|
||||
char *rpc_user, *rpc_pass;
|
||||
@@ -224,8 +223,7 @@ char* lp_id;
|
||||
|
||||
static void workio_cmd_free(struct workio_cmd *wc);
|
||||
|
||||
// array mapping thread to cpu
|
||||
static uint8_t thread_affinity_map[ max_cpus ];
|
||||
static int *thread_affinity_map;
|
||||
|
||||
// display affinity mask graphically
|
||||
static void format_affinity_mask( char *mask_str, uint64_t mask )
|
||||
@@ -432,20 +430,18 @@ static bool work_decode( const json_t *val, struct work *work )
|
||||
if ( unlikely( !algo_gate.work_decode( work ) ) )
|
||||
return false;
|
||||
|
||||
if ( !allow_mininginfo )
|
||||
net_diff = algo_gate.calc_network_diff( work );
|
||||
else
|
||||
net_diff = hash_to_diff( work->target );
|
||||
|
||||
work->targetdiff = net_diff;
|
||||
stratum_diff = last_targetdiff = work->targetdiff;
|
||||
// many of these aren't used solo.
|
||||
net_diff =
|
||||
work->targetdiff =
|
||||
stratum_diff =
|
||||
last_targetdiff = hash_to_diff( work->target );
|
||||
work->sharediff = 0;
|
||||
algo_gate.decode_extra_data( work, &net_blocks );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// good alternative for wallet mining, difficulty and net hashrate
|
||||
// Only used for net_hashrate with GBT/getwork, data is from previous block.
|
||||
static const char *info_req =
|
||||
"{\"method\": \"getmininginfo\", \"params\": [], \"id\":8}\r\n";
|
||||
|
||||
@@ -471,17 +467,14 @@ static bool get_mininginfo( CURL *curl, struct work *work )
|
||||
// "networkhashps": 56475980
|
||||
if ( res )
|
||||
{
|
||||
// net_diff is a global that is set from the work hash target by
|
||||
// both getwork and GBT. Don't overwrite it, define a local to override
|
||||
// the global.
|
||||
double net_diff = 0.;
|
||||
double difficulty = 0.;
|
||||
json_t *key = json_object_get( res, "difficulty" );
|
||||
if ( key )
|
||||
{
|
||||
if ( json_is_object( key ) )
|
||||
key = json_object_get( key, "proof-of-work" );
|
||||
if ( json_is_real( key ) )
|
||||
net_diff = json_real_value( key );
|
||||
difficulty = json_real_value( key );
|
||||
}
|
||||
|
||||
key = json_object_get( res, "networkhashps" );
|
||||
@@ -498,12 +491,13 @@ static bool get_mininginfo( CURL *curl, struct work *work )
|
||||
net_blocks = json_integer_value( key );
|
||||
|
||||
if ( opt_debug )
|
||||
applog(LOG_INFO,"Mining info: diff %.5g, net_hashrate %f, height %d",
|
||||
net_diff, net_hashrate, net_blocks );
|
||||
|
||||
applog( LOG_INFO,"getmininginfo: difficulty %.5g, networkhashps %.5g, blocks %d", difficulty, net_hashrate, net_blocks );
|
||||
|
||||
if ( !work->height )
|
||||
{
|
||||
// complete missing data from getwork
|
||||
if ( opt_debug )
|
||||
applog( LOG_DEBUG, "work height set by getmininginfo" );
|
||||
work->height = (uint32_t) net_blocks + 1;
|
||||
if ( work->height > g_work.height )
|
||||
restart_threads();
|
||||
@@ -535,9 +529,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
json_t *tmp, *txa;
|
||||
bool rc = false;
|
||||
int i, n;
|
||||
|
||||
// Segwit BEGIN
|
||||
bool segwit = false;
|
||||
|
||||
tmp = json_object_get( val, "rules" );
|
||||
if ( tmp && json_is_array( tmp ) )
|
||||
{
|
||||
@@ -555,8 +548,7 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
}
|
||||
}
|
||||
}
|
||||
// Segwit END
|
||||
|
||||
|
||||
tmp = json_object_get( val, "mutable" );
|
||||
if ( tmp && json_is_array( tmp ) )
|
||||
{
|
||||
@@ -638,7 +630,7 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* find count and size of transactions */
|
||||
txa = json_object_get(val, "transactions" );
|
||||
if ( !txa || !json_is_array( txa ) )
|
||||
@@ -713,12 +705,7 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
cbtx[41] = cbtx_size - 42; /* scriptsig length */
|
||||
le32enc( (uint32_t *)( cbtx+cbtx_size ), 0xffffffff ); /* sequence */
|
||||
cbtx_size += 4;
|
||||
|
||||
// Segwit BEGIN
|
||||
//cbtx[cbtx_size++] = 1; /* out-counter */
|
||||
cbtx[cbtx_size++] = segwit ? 2 : 1; /* out-counter */
|
||||
// Segwit END
|
||||
|
||||
cbtx[cbtx_size++] = segwit ? 2 : 1; /* out-counter */
|
||||
le32enc( (uint32_t *)( cbtx+cbtx_size) , (uint32_t)cbvalue ); /* value */
|
||||
le32enc( (uint32_t *)( cbtx+cbtx_size+4 ), cbvalue >> 32 );
|
||||
cbtx_size += 8;
|
||||
@@ -726,7 +713,6 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
memcpy( cbtx+cbtx_size, pk_script, pk_script_size );
|
||||
cbtx_size += (int) pk_script_size;
|
||||
|
||||
// Segwit BEGIN
|
||||
if ( segwit )
|
||||
{
|
||||
unsigned char (*wtree)[32] = calloc(tx_count + 2, 32);
|
||||
@@ -761,12 +747,11 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
for ( i = 0; i < n; i++ )
|
||||
sha256d( wtree[i], wtree[2*i], 64 );
|
||||
}
|
||||
memset( wtree[1], 0, 32 ); /* witness reserved value = 0 */
|
||||
memset( wtree[1], 0, 32 ); // witness reserved value = 0
|
||||
sha256d( cbtx+cbtx_size, wtree[0], 64 );
|
||||
cbtx_size += 32;
|
||||
free( wtree );
|
||||
}
|
||||
// Segwit END
|
||||
|
||||
le32enc( (uint32_t *)( cbtx+cbtx_size ), 0 ); /* lock time */
|
||||
cbtx_size += 4;
|
||||
@@ -785,10 +770,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
xsig_len += n;
|
||||
}
|
||||
else
|
||||
{
|
||||
applog( LOG_WARNING,
|
||||
"Signature does not fit in coinbase, skipping" );
|
||||
}
|
||||
}
|
||||
tmp = json_object_get( val, "coinbaseaux" );
|
||||
if ( tmp && json_is_object( tmp ) )
|
||||
@@ -815,8 +798,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
if ( xsig_len )
|
||||
{
|
||||
unsigned char *ssig_end = cbtx + 42 + cbtx[41];
|
||||
int push_len = cbtx[41] + xsig_len < 76 ? 1 :
|
||||
cbtx[41] + 2 + xsig_len > 100 ? 0 : 2;
|
||||
int push_len = cbtx[41] + xsig_len < 76
|
||||
? 1 : cbtx[41] + 2 + xsig_len > 100 ? 0 : 2;
|
||||
n = xsig_len + push_len;
|
||||
memmove( ssig_end + n, ssig_end, cbtx_size - 42 - cbtx[41] );
|
||||
cbtx[41] += n;
|
||||
@@ -843,7 +826,6 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
const char *tx_hex = json_string_value( json_object_get( tmp, "data" ) );
|
||||
const int tx_size = tx_hex ? (int) ( strlen( tx_hex ) / 2 ) : 0;
|
||||
|
||||
// Segwit BEGIN
|
||||
if ( segwit )
|
||||
{
|
||||
const char *txid = json_string_value( json_object_get( tmp, "txid" ) );
|
||||
@@ -856,8 +838,6 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
}
|
||||
else
|
||||
{
|
||||
// Segwit END
|
||||
|
||||
unsigned char *tx = (uchar*) malloc( tx_size );
|
||||
if ( !tx_hex || !hex2bin( tx, tx_hex, tx_size ) )
|
||||
{
|
||||
@@ -867,10 +847,7 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
}
|
||||
sha256d( merkle_tree[1 + i], tx, tx_size );
|
||||
free( tx );
|
||||
|
||||
// Segwit BEGIN
|
||||
}
|
||||
// Segwit END
|
||||
|
||||
if ( !submit_coinbase )
|
||||
strcat( work->txs, tx_hex );
|
||||
@@ -888,6 +865,8 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
sha256d( merkle_tree[i], merkle_tree[2*i], 64 );
|
||||
}
|
||||
|
||||
work->tx_count = tx_count;
|
||||
|
||||
/* assemble block header */
|
||||
algo_gate.build_block_header( work, swab32( version ),
|
||||
(uint32_t*) prevhash, (uint32_t*) merkle_tree,
|
||||
@@ -900,21 +879,11 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
|
||||
goto out;
|
||||
}
|
||||
|
||||
// See git issue https://github.com/JayDDee/cpuminer-opt/issues/379
|
||||
#if defined(__AVX2__)
|
||||
if ( opt_debug )
|
||||
{
|
||||
if ( (uint64_t)target % 32 )
|
||||
applog( LOG_ERR, "Misaligned target %p", target );
|
||||
if ( (uint64_t)(work->target) % 32 )
|
||||
applog( LOG_ERR, "Misaligned work->target %p", work->target );
|
||||
}
|
||||
#endif
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
work->target[7 - i] = be32dec( target + i );
|
||||
// reverse the bytes in target
|
||||
casti_m128i( work->target, 0 ) = mm128_bswap_128( casti_m128i( target, 1 ) );
|
||||
casti_m128i( work->target, 1 ) = mm128_bswap_128( casti_m128i( target, 0 ) );
|
||||
net_diff = work->targetdiff = hash_to_diff( work->target );
|
||||
|
||||
|
||||
tmp = json_object_get( val, "workid" );
|
||||
if ( tmp )
|
||||
{
|
||||
@@ -1090,12 +1059,11 @@ void report_summary_log( bool force )
|
||||
timeval_subtract( &et, &now, &start_time );
|
||||
timeval_subtract( &uptime, &total_hashes_time, &session_start );
|
||||
|
||||
double share_time = (double)et.tv_sec + (double)et.tv_usec / 1e6;
|
||||
double share_time = (double)et.tv_sec + (double)et.tv_usec * 1e-6;
|
||||
double ghrate = safe_div( total_hashes, (double)uptime.tv_sec, 0. );
|
||||
double target_diff = exp32 * last_targetdiff;
|
||||
double shrate = safe_div( target_diff * (double)(accepts),
|
||||
share_time, 0. );
|
||||
// global_hashrate = ghrate;
|
||||
double sess_hrate = safe_div( exp32 * norm_diff_sum,
|
||||
(double)uptime.tv_sec, 0. );
|
||||
double submit_rate = safe_div( (double)submits * 60., share_time, 0. );
|
||||
@@ -1116,7 +1084,7 @@ void report_summary_log( bool force )
|
||||
applog2( LOG_NOTICE, "Periodic Report %s %s", et_str, upt_str );
|
||||
applog2( LOG_INFO, "Share rate %.2f/min %.2f/min",
|
||||
submit_rate, safe_div( (double)submitted_share_count*60.,
|
||||
( (double)uptime.tv_sec + (double)uptime.tv_usec / 1e6 ), 0. ) );
|
||||
( (double)uptime.tv_sec + (double)uptime.tv_usec * 1e-6 ), 0. ) );
|
||||
applog2( LOG_INFO, "Hash rate %7.2f%sh/s %7.2f%sh/s (%.2f%sh/s)",
|
||||
shrate, shr_units, sess_hrate, sess_hr_units, ghrate, ghr_units );
|
||||
|
||||
@@ -1563,7 +1531,6 @@ const char *getwork_req =
|
||||
|
||||
#define GBT_CAPABILITIES "[\"coinbasetxn\", \"coinbasevalue\", \"longpoll\", \"workid\"]"
|
||||
|
||||
// Segwit BEGIN
|
||||
#define GBT_RULES "[\"segwit\"]"
|
||||
static const char *gbt_req =
|
||||
"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
|
||||
@@ -1572,16 +1539,6 @@ const char *gbt_lp_req =
|
||||
"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
|
||||
GBT_CAPABILITIES ", \"rules\": " GBT_RULES ", \"longpollid\": \"%s\"}], \"id\":0}\r\n";
|
||||
|
||||
/*
|
||||
static const char *gbt_req =
|
||||
"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
|
||||
GBT_CAPABILITIES "}], \"id\":0}\r\n";
|
||||
const char *gbt_lp_req =
|
||||
"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
|
||||
GBT_CAPABILITIES ", \"longpollid\": \"%s\"}], \"id\":0}\r\n";
|
||||
*/
|
||||
// Segwit END
|
||||
|
||||
static bool get_upstream_work( CURL *curl, struct work *work )
|
||||
{
|
||||
json_t *val;
|
||||
@@ -1656,49 +1613,49 @@ start:
|
||||
last_block_height = work->height;
|
||||
last_targetdiff = net_diff;
|
||||
|
||||
applog( LOG_BLUE, "New Block %d, Net Diff %.5g, Ntime %08x",
|
||||
work->height, net_diff,
|
||||
applog( LOG_BLUE, "New Block %d, Tx %d, Net Diff %.5g, Ntime %08x",
|
||||
work->height, work->tx_count, net_diff,
|
||||
work->data[ algo_gate.ntime_index ] );
|
||||
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
double miner_hr = 0.;
|
||||
double net_hr = net_hashrate;
|
||||
double nd = net_diff * exp32;
|
||||
char net_hr_units[4] = {0};
|
||||
char miner_hr_units[4] = {0};
|
||||
char net_ttf[32];
|
||||
char miner_ttf[32];
|
||||
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
for ( int i = 0; i < opt_n_threads; i++ )
|
||||
miner_hr += thr_hashrates[i];
|
||||
global_hashrate = miner_hr;
|
||||
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
if ( net_hr > 0. )
|
||||
sprintf_et( net_ttf, nd / net_hr );
|
||||
else
|
||||
sprintf( net_ttf, "NA" );
|
||||
if ( miner_hr > 0. )
|
||||
sprintf_et( miner_ttf, nd / miner_hr );
|
||||
else
|
||||
sprintf( miner_ttf, "NA" );
|
||||
|
||||
scale_hash_for_display ( &miner_hr, miner_hr_units );
|
||||
scale_hash_for_display ( &net_hr, net_hr_units );
|
||||
applog2( LOG_INFO,
|
||||
"Miner TTF @ %.2f %sh/s %s, Net TTF @ %.2f %sh/s %s",
|
||||
miner_hr, miner_hr_units, miner_ttf, net_hr,
|
||||
net_hr_units, net_ttf );
|
||||
}
|
||||
} // work->height > last_block_height
|
||||
}
|
||||
else if ( memcmp( &work->data[1], &g_work.data[1], 32 ) )
|
||||
applog( LOG_BLUE, "New Work: Block %d, Net Diff %.5g, Ntime %08x",
|
||||
work->height, net_diff,
|
||||
work->data[ algo_gate.ntime_index ] );
|
||||
applog( LOG_BLUE, "New Work: Block %d, Tx %d, Net Diff %.5g, Ntime %08x",
|
||||
work->height, work->tx_count, net_diff,
|
||||
work->data[ algo_gate.ntime_index ] );
|
||||
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
double miner_hr = 0.;
|
||||
double net_hr = net_hashrate;
|
||||
double nd = net_diff * exp32;
|
||||
char net_hr_units[4] = {0};
|
||||
char miner_hr_units[4] = {0};
|
||||
char net_ttf[32];
|
||||
char miner_ttf[32];
|
||||
|
||||
pthread_mutex_lock( &stats_lock );
|
||||
|
||||
for ( int i = 0; i < opt_n_threads; i++ )
|
||||
miner_hr += thr_hashrates[i];
|
||||
global_hashrate = miner_hr;
|
||||
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
if ( net_hr > 0. )
|
||||
sprintf_et( net_ttf, nd / net_hr );
|
||||
else
|
||||
sprintf( net_ttf, "NA" );
|
||||
if ( miner_hr > 0. )
|
||||
sprintf_et( miner_ttf, nd / miner_hr );
|
||||
else
|
||||
sprintf( miner_ttf, "NA" );
|
||||
|
||||
scale_hash_for_display ( &miner_hr, miner_hr_units );
|
||||
scale_hash_for_display ( &net_hr, net_hr_units );
|
||||
applog2( LOG_INFO,
|
||||
"Miner TTF @ %.2f %sh/s %s, Net TTF @ %.2f %sh/s %s",
|
||||
miner_hr, miner_hr_units, miner_ttf, net_hr,
|
||||
net_hr_units, net_ttf );
|
||||
}
|
||||
} // rc
|
||||
|
||||
return rc;
|
||||
@@ -1724,20 +1681,19 @@ static void workio_cmd_free(struct workio_cmd *wc)
|
||||
|
||||
static bool workio_get_work( struct workio_cmd *wc, CURL *curl )
|
||||
{
|
||||
struct work *ret_work;
|
||||
struct work *work_heap;
|
||||
int failures = 0;
|
||||
|
||||
ret_work = (struct work*) _mm_malloc( sizeof(*ret_work), 32 );
|
||||
if ( !ret_work ) return false;
|
||||
memset( ret_work, 0, sizeof(*ret_work) );
|
||||
work_heap = calloc( 1, sizeof(struct work) );
|
||||
if ( !work_heap ) return false;
|
||||
|
||||
/* obtain new work from bitcoin via JSON-RPC */
|
||||
while ( !get_upstream_work( curl, ret_work ) )
|
||||
while ( !get_upstream_work( curl, work_heap ) )
|
||||
{
|
||||
if ( unlikely( ( opt_retries >= 0 ) && ( ++failures > opt_retries ) ) )
|
||||
{
|
||||
applog( LOG_ERR, "json_rpc_call failed, terminating workio thread" );
|
||||
free( ret_work );
|
||||
free( work_heap );
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1748,8 +1704,8 @@ static bool workio_get_work( struct workio_cmd *wc, CURL *curl )
|
||||
}
|
||||
|
||||
/* send work to requesting thread */
|
||||
if ( !tq_push(wc->thr->q, ret_work ) )
|
||||
free( ret_work );
|
||||
if ( !tq_push(wc->thr->q, work_heap ) )
|
||||
free( work_heap );
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1825,7 +1781,7 @@ static void *workio_thread(void *userdata)
|
||||
static bool get_work(struct thr_info *thr, struct work *work)
|
||||
{
|
||||
struct workio_cmd *wc;
|
||||
struct work *work_heap;
|
||||
struct work *work_heap;
|
||||
|
||||
if unlikely( opt_benchmark )
|
||||
{
|
||||
@@ -1850,17 +1806,16 @@ static bool get_work(struct thr_info *thr, struct work *work)
|
||||
wc->thr = thr;
|
||||
/* send work request to workio thread */
|
||||
if (!tq_push(thr_info[work_thr_id].q, wc))
|
||||
{
|
||||
{
|
||||
workio_cmd_free(wc);
|
||||
return false;
|
||||
}
|
||||
/* wait for response, a unit of work */
|
||||
work_heap = (struct work*) tq_pop(thr->q, NULL);
|
||||
if (!work_heap)
|
||||
return false;
|
||||
/* copy returned work into storage provided by caller */
|
||||
memcpy(work, work_heap, sizeof(*work));
|
||||
free(work_heap);
|
||||
if ( !work_heap ) return false;
|
||||
/* copy returned work into storage provided by caller */
|
||||
memcpy( work, work_heap, sizeof(*work) );
|
||||
free( work_heap );
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1910,9 +1865,9 @@ static void update_submit_stats( struct work *work, const void *hash )
|
||||
bool submit_solution( struct work *work, const void *hash,
|
||||
struct thr_info *thr )
|
||||
{
|
||||
// Job went stale during hashing of a valid share.
|
||||
if ( !opt_quiet && work_restart[ thr->id ].restart )
|
||||
applog( LOG_INFO, CL_LBL "Share may be stale, submitting anyway..." CL_N );
|
||||
// Job went stale during hashing of a valid share.
|
||||
// if ( !opt_quiet && work_restart[ thr->id ].restart )
|
||||
// applog( LOG_INFO, CL_LBL "Share may be stale, submitting anyway..." CL_N );
|
||||
|
||||
work->sharediff = hash_to_diff( hash );
|
||||
if ( likely( submit_work( thr, work ) ) )
|
||||
@@ -1930,32 +1885,34 @@ bool submit_solution( struct work *work, const void *hash,
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
if ( have_stratum )
|
||||
{
|
||||
applog( LOG_INFO, "%d Submitted Diff %.5g, Block %d, Job %s",
|
||||
submitted_share_count, work->sharediff, work->height,
|
||||
work->job_id );
|
||||
if ( opt_debug && opt_extranonce )
|
||||
{
|
||||
unsigned char *xnonce2str = abin2hex( work->xnonce2,
|
||||
work->xnonce2_len );
|
||||
applog( LOG_INFO, "Xnonce2 %s", xnonce2str );
|
||||
free( xnonce2str );
|
||||
}
|
||||
}
|
||||
else
|
||||
applog( LOG_INFO, "%d Submitted Diff %.5g, Block %d, Ntime %08x",
|
||||
submitted_share_count, work->sharediff, work->height,
|
||||
work->data[ algo_gate.ntime_index ] );
|
||||
}
|
||||
|
||||
if ( opt_debug )
|
||||
{
|
||||
uint32_t* h = (uint32_t*)hash;
|
||||
uint32_t* t = (uint32_t*)work->target;
|
||||
uint32_t* d = (uint32_t*)work->data;
|
||||
if ( opt_debug )
|
||||
{
|
||||
uint32_t* h = (uint32_t*)hash;
|
||||
uint32_t* t = (uint32_t*)work->target;
|
||||
uint32_t* d = (uint32_t*)work->data;
|
||||
|
||||
unsigned char *xnonce2str = abin2hex( work->xnonce2,
|
||||
work->xnonce2_len );
|
||||
applog(LOG_INFO,"Thread %d, Nonce %08x, Xnonce2 %s", thr->id,
|
||||
work->data[ algo_gate.nonce_index ], xnonce2str );
|
||||
free( xnonce2str );
|
||||
applog(LOG_INFO,"Data[0:19]: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7],d[8],d[9] );
|
||||
applog(LOG_INFO," : %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", d[10],d[11],d[12],d[13],d[14],d[15],d[16],d[17],d[18],d[19]);
|
||||
applog(LOG_INFO,"Hash[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
|
||||
h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0]);
|
||||
applog(LOG_INFO,"Targ[7:0]: %08x %08x %08x %08x %08x %08x %08x %08x",
|
||||
t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0]);
|
||||
applog( LOG_INFO, "Data[ 0: 9]: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7],d[8],d[9] );
|
||||
applog( LOG_INFO, "Data[10:19]: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", d[10],d[11],d[12],d[13],d[14],d[15],d[16],d[17],d[18],d[19] );
|
||||
applog( LOG_INFO, "Hash[ 7: 0]: %08x %08x %08x %08x %08x %08x %08x %08x", h[7],h[6],h[5],h[4],h[3],h[2],h[1],h[0] );
|
||||
applog( LOG_INFO, "Targ[ 7: 0]: %08x %08x %08x %08x %08x %08x %08x %08x", t[7],t[6],t[5],t[4],t[3],t[2],t[1],t[0] );
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -2031,33 +1988,6 @@ void set_work_data_big_endian( struct work *work )
|
||||
be32enc( work->data + i, work->data[i] );
|
||||
}
|
||||
|
||||
// calculate net diff from nbits.
|
||||
double std_calc_network_diff( struct work* work )
|
||||
{
|
||||
uint32_t nbits = work->data[ algo_gate.nbits_index ];
|
||||
uint32_t shift = nbits & 0xff;
|
||||
uint32_t bits = bswap_32( nbits ) & 0x00ffffff;
|
||||
/*
|
||||
// sample for diff 43.281 : 1c05ea29
|
||||
// todo: endian reversed on longpoll could be zr5 specific...
|
||||
int nbits_index = algo_gate.nbits_index;
|
||||
uint32_t nbits = have_longpoll ? work->data[ nbits_index]
|
||||
: swab32( work->data[ nbits_index ] );
|
||||
uint32_t bits = ( nbits & 0xffffff );
|
||||
int16_t shift = ( swab32(nbits) & 0xff ); // 0x1c = 28
|
||||
*/
|
||||
|
||||
int m;
|
||||
long double d = (long double)0x0000ffff / (long double)bits;
|
||||
for ( m = shift; m < 29; m++ )
|
||||
d *= 256.0;
|
||||
for ( m = 29; m < shift; m++ )
|
||||
d /= 256.0;
|
||||
if ( opt_debug_diff )
|
||||
applog(LOG_DEBUG, "net diff: %8f -> shift %u, bits %08x", (double)d, shift, bits);
|
||||
return (double)d;
|
||||
}
|
||||
|
||||
void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
uint32_t *end_nonce_ptr )
|
||||
{
|
||||
@@ -2081,17 +2011,6 @@ void std_get_new_work( struct work* work, struct work* g_work, int thr_id,
|
||||
++(*nonceptr);
|
||||
}
|
||||
|
||||
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
|
||||
int thr_id )
|
||||
{
|
||||
if ( have_stratum && !work->data[0] && !opt_benchmark )
|
||||
{
|
||||
sleep(1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
{
|
||||
bool new_job;
|
||||
@@ -2108,7 +2027,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
g_work->xnonce2 = (uchar*) realloc( g_work->xnonce2, sctx->xnonce2_size );
|
||||
memcpy( g_work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size );
|
||||
algo_gate.build_extraheader( g_work, sctx );
|
||||
net_diff = algo_gate.calc_network_diff( g_work );
|
||||
net_diff = nbits_to_diff( g_work->data[ algo_gate.nbits_index ] );
|
||||
algo_gate.set_work_data_endian( g_work );
|
||||
g_work->height = sctx->block_height;
|
||||
g_work->targetdiff = sctx->job.diff
|
||||
@@ -2122,10 +2041,6 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
t++ );
|
||||
|
||||
g_work_time = time(NULL);
|
||||
|
||||
// Do midstate prehash
|
||||
algo_gate.prehash( g_work );
|
||||
|
||||
restart_threads();
|
||||
|
||||
pthread_mutex_unlock( &sctx->work_lock );
|
||||
@@ -2141,15 +2056,18 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
pthread_mutex_unlock( &stats_lock );
|
||||
|
||||
if ( stratum_diff != sctx->job.diff )
|
||||
applog( LOG_BLUE, "New Stratum Diff %g, Block %d, Job %s",
|
||||
sctx->job.diff, sctx->block_height, g_work->job_id );
|
||||
applog( LOG_BLUE, "New Stratum Diff %g, Block %d, Tx %d, Job %s",
|
||||
sctx->job.diff, sctx->block_height,
|
||||
sctx->job.merkle_count, g_work->job_id );
|
||||
else if ( last_block_height != sctx->block_height )
|
||||
applog( LOG_BLUE, "New Block %d, Net diff %.5g, Job %s",
|
||||
sctx->block_height, net_diff, g_work->job_id );
|
||||
applog( LOG_BLUE, "New Block %d, Tx %d, Netdiff %.5g, Job %s",
|
||||
sctx->block_height, sctx->job.merkle_count,
|
||||
net_diff, g_work->job_id );
|
||||
else if ( g_work->job_id && new_job )
|
||||
applog( LOG_BLUE, "New Work: Block %d, Net diff %.5g, Job %s",
|
||||
sctx->block_height, net_diff, g_work->job_id );
|
||||
else if ( opt_debug )
|
||||
applog( LOG_BLUE, "New Work: Block %d, Tx %d, Netdiff %.5g, Job %s",
|
||||
sctx->block_height, sctx->job.merkle_count,
|
||||
net_diff, g_work->job_id );
|
||||
else if ( !opt_quiet )
|
||||
{
|
||||
unsigned char *xnonce2str = bebin2hex( g_work->xnonce2,
|
||||
g_work->xnonce2_len );
|
||||
@@ -2162,8 +2080,6 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
if ( ( stratum_diff != sctx->job.diff )
|
||||
|| ( last_block_height != sctx->block_height ) )
|
||||
{
|
||||
static bool multipool = false;
|
||||
if ( stratum.block_height < last_block_height ) multipool = true;
|
||||
if ( unlikely( !session_first_block ) )
|
||||
session_first_block = stratum.block_height;
|
||||
last_block_height = stratum.block_height;
|
||||
@@ -2171,58 +2087,47 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
last_targetdiff = g_work->targetdiff;
|
||||
if ( lowest_share < last_targetdiff )
|
||||
lowest_share = 9e99;
|
||||
}
|
||||
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "Diff: Net %.5g, Stratum %.5g, Target %.5g",
|
||||
net_diff, stratum_diff, g_work->targetdiff );
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
applog2( LOG_INFO, "Diff: Net %.5g, Stratum %.5g, Target %.5g",
|
||||
net_diff, stratum_diff, g_work->targetdiff );
|
||||
|
||||
if ( likely( hr > 0. ) )
|
||||
{
|
||||
double nd = net_diff * exp32;
|
||||
char hr_units[4] = {0};
|
||||
char block_ttf[32];
|
||||
char share_ttf[32];
|
||||
if ( likely( hr > 0. ) )
|
||||
{
|
||||
double nd = net_diff * exp32;
|
||||
char hr_units[4] = {0};
|
||||
char block_ttf[32];
|
||||
char share_ttf[32];
|
||||
static bool multipool = false;
|
||||
|
||||
if ( stratum.block_height < last_block_height ) multipool = true;
|
||||
|
||||
sprintf_et( block_ttf, nd / hr );
|
||||
sprintf_et( share_ttf, ( g_work->targetdiff * exp32 ) / hr );
|
||||
scale_hash_for_display ( &hr, hr_units );
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: Block %s, Share %s",
|
||||
hr, hr_units, block_ttf, share_ttf );
|
||||
|
||||
sprintf_et( block_ttf, nd / hr );
|
||||
sprintf_et( share_ttf, ( g_work->targetdiff * exp32 ) / hr );
|
||||
scale_hash_for_display ( &hr, hr_units );
|
||||
applog2( LOG_INFO, "TTF @ %.2f %sh/s: Block %s, Share %s",
|
||||
hr, hr_units, block_ttf, share_ttf );
|
||||
|
||||
if ( !multipool && last_block_height > session_first_block )
|
||||
{
|
||||
struct timeval now, et;
|
||||
gettimeofday( &now, NULL );
|
||||
timeval_subtract( &et, &now, &session_start );
|
||||
uint64_t net_ttf =
|
||||
( last_block_height - session_first_block ) == 0 ? 0
|
||||
: et.tv_sec / ( last_block_height - session_first_block );
|
||||
if ( net_diff > 0. && net_ttf )
|
||||
{
|
||||
double net_hr = nd / net_ttf;
|
||||
char net_hr_units[4] = {0};
|
||||
scale_hash_for_display ( &net_hr, net_hr_units );
|
||||
applog2( LOG_INFO, "Net hash rate (est) %.2f %sh/s",
|
||||
net_hr, net_hr_units );
|
||||
}
|
||||
}
|
||||
} // hr > 0
|
||||
} // !quiet
|
||||
} // new diff/block
|
||||
|
||||
/*
|
||||
if ( new_job && !( opt_quiet || stratum_errors ) )
|
||||
{
|
||||
int mismatch = submitted_share_count - ( accepted_share_count
|
||||
+ stale_share_count
|
||||
+ rejected_share_count );
|
||||
if ( mismatch )
|
||||
applog( LOG_INFO,
|
||||
CL_LBL "%d Submitted share pending, maybe stale" CL_N,
|
||||
submitted_share_count );
|
||||
}
|
||||
*/
|
||||
if ( !multipool && last_block_height > session_first_block )
|
||||
{
|
||||
struct timeval now, et;
|
||||
gettimeofday( &now, NULL );
|
||||
timeval_subtract( &et, &now, &session_start );
|
||||
uint64_t net_ttf = safe_div( et.tv_sec,
|
||||
last_block_height - session_first_block, 0 );
|
||||
if ( net_diff > 0. && net_ttf )
|
||||
{
|
||||
double net_hr = safe_div( nd, net_ttf, 0. );
|
||||
char net_hr_units[4] = {0};
|
||||
scale_hash_for_display ( &net_hr, net_hr_units );
|
||||
applog2( LOG_INFO, "Net hash rate (est) %.2f %sh/s",
|
||||
net_hr, net_hr_units );
|
||||
}
|
||||
}
|
||||
} // hr > 0
|
||||
} // !quiet
|
||||
}
|
||||
|
||||
static void *miner_thread( void *userdata )
|
||||
@@ -2344,9 +2249,6 @@ static void *miner_thread( void *userdata )
|
||||
goto out;
|
||||
}
|
||||
g_work_time = time(NULL);
|
||||
|
||||
// do midstate prehash
|
||||
algo_gate.prehash( &g_work );
|
||||
restart_threads();
|
||||
}
|
||||
|
||||
@@ -2363,9 +2265,6 @@ static void *miner_thread( void *userdata )
|
||||
} // do_this_thread
|
||||
algo_gate.resync_threads( thr_id, &work );
|
||||
|
||||
if ( unlikely( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) ) )
|
||||
continue;
|
||||
|
||||
// conditional mining
|
||||
if ( unlikely( !wanna_mine( thr_id ) ) )
|
||||
{
|
||||
@@ -3745,7 +3644,6 @@ int main(int argc, char *argv[])
|
||||
if ( opt_time_limit )
|
||||
time_limit_stop = (unsigned int)time(NULL) + opt_time_limit;
|
||||
|
||||
|
||||
// need to register to get algo optimizations for cpu capabilities
|
||||
// but that causes registration logs before cpu capabilities is output.
|
||||
// Would need to split register function into 2 parts. First part sets algo
|
||||
@@ -3874,24 +3772,29 @@ int main(int argc, char *argv[])
|
||||
#endif
|
||||
|
||||
#if defined(WIN32) && defined(WINDOWS_CPU_GROUPS_ENABLED)
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_INFO, "Found %d CPUs in %d groups", num_cpus, num_cpugroups );
|
||||
if ( opt_debug || ( !opt_quiet && num_cpugroups > 1 ) )
|
||||
applog( LOG_INFO, "Found %d CPUs in %d groups",
|
||||
num_cpus, num_cpugroups );
|
||||
#endif
|
||||
|
||||
if ( opt_affinity && num_cpus > max_cpus )
|
||||
const int map_size = opt_n_threads < num_cpus ? num_cpus : opt_n_threads;
|
||||
thread_affinity_map = malloc( map_size * (sizeof (int)) );
|
||||
if ( !thread_affinity_map )
|
||||
{
|
||||
applog( LOG_WARNING, "More than %d CPUs, CPU affinity is disabled",
|
||||
max_cpus );
|
||||
applog( LOG_ERR, "CPU Affinity disabled, memory allocation failed" );
|
||||
opt_affinity = 0ULL;
|
||||
}
|
||||
|
||||
}
|
||||
if ( opt_affinity )
|
||||
{
|
||||
for ( int thr = 0, cpu = 0; thr < opt_n_threads; thr++, cpu++ )
|
||||
int active_cpus = 0; // total CPUs available using rolling affinity mask
|
||||
for ( int thr = 0, cpu = 0; thr < map_size; thr++, cpu++ )
|
||||
{
|
||||
while ( !( ( opt_affinity >> ( cpu & 63 ) ) & 1ULL ) ) cpu++;
|
||||
thread_affinity_map[ thr ] = cpu % num_cpus;
|
||||
if ( cpu < num_cpus ) active_cpus++;
|
||||
}
|
||||
if ( opt_n_threads > active_cpus )
|
||||
applog( LOG_WARNING, "Affinity: more threads (%d) than active CPUs (%d)", opt_n_threads, active_cpus );
|
||||
if ( !opt_quiet )
|
||||
{
|
||||
char affinity_mask[64];
|
||||
|
47
miner.h
47
miner.h
@@ -24,6 +24,11 @@
|
||||
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
// prevent questions from ARM users that don't read the requirements.
|
||||
#if !defined(__x86_64__)
|
||||
#error "CPU architecture not supported. Consult the requirements for supported CPUs."
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <inttypes.h>
|
||||
#include <sys/time.h>
|
||||
@@ -91,6 +96,19 @@ enum {
|
||||
LOG_PINK = 0x14 };
|
||||
#endif
|
||||
|
||||
#define WORK_ALIGNMENT 64
|
||||
|
||||
// When working with dynamically allocated memory to guarantee data alignment
|
||||
// for large vectors. Physical block size must be extended by alignment number
|
||||
// of bytes when allocated. free() should use the physical pointer returned by
|
||||
// malloc(), not the aligned pointer. All others shoujld use the logical,
|
||||
// aligned, pointer returned by this function.
|
||||
static inline void *align_ptr( const void *ptr, const uint64_t alignment )
|
||||
{
|
||||
const uint64_t mask = alignment - 1;
|
||||
return (void*)( ( ((const uint64_t)ptr) + mask ) & (~mask) );
|
||||
}
|
||||
|
||||
extern bool is_power_of_2( int n );
|
||||
|
||||
static inline bool is_windows(void)
|
||||
@@ -317,7 +335,7 @@ extern void cbin2hex(char *out, const char *in, size_t len);
|
||||
void bin2hex( char *s, const unsigned char *p, size_t len );
|
||||
char *abin2hex( const unsigned char *p, size_t len );
|
||||
char *bebin2hex( const unsigned char *p, size_t len );
|
||||
bool hex2bin( unsigned char *p, const char *hexstr, size_t len );
|
||||
bool hex2bin( unsigned char *p, const char *hexstr, const size_t len );
|
||||
bool jobj_binary( const json_t *obj, const char *key, void *buf,
|
||||
size_t buflen );
|
||||
int varint_encode( unsigned char *p, uint64_t n );
|
||||
@@ -333,10 +351,7 @@ extern void memrev(unsigned char *p, size_t len);
|
||||
// number of hashes.
|
||||
//
|
||||
// https://en.bitcoin.it/wiki/Difficulty
|
||||
//
|
||||
// hash = diff * 2**32
|
||||
//
|
||||
// diff_to_hash = 2**32 = 0x100000000 = 4294967296 = exp32;
|
||||
|
||||
#define EXP16 65536.
|
||||
#define EXP32 4294967296.
|
||||
@@ -350,8 +365,9 @@ extern const long double exp160; // 2**160
|
||||
bool fulltest( const uint32_t *hash, const uint32_t *target );
|
||||
bool valid_hash( const void*, const void* );
|
||||
|
||||
double hash_to_diff( const void* );
|
||||
extern double hash_to_diff( const void* );
|
||||
extern void diff_to_hash( uint32_t*, const double );
|
||||
extern double nbits_to_diff( uint32_t );
|
||||
|
||||
double hash_target_ratio( uint32_t* hash, uint32_t* target );
|
||||
void work_set_target_ratio( struct work* work, const void *hash );
|
||||
@@ -392,20 +408,21 @@ float cpu_temp( int core );
|
||||
|
||||
struct work
|
||||
{
|
||||
uint32_t data[48] __attribute__ ((aligned (64)));
|
||||
uint32_t target[8] __attribute__ ((aligned (32)));
|
||||
uint32_t target[8] __attribute__ ((aligned (64)));
|
||||
uint32_t data[48] __attribute__ ((aligned (64)));
|
||||
double targetdiff;
|
||||
double sharediff;
|
||||
double stratum_diff;
|
||||
int height;
|
||||
char *txs;
|
||||
char *workid;
|
||||
int tx_count;
|
||||
char *workid;
|
||||
char *job_id;
|
||||
size_t xnonce2_len;
|
||||
unsigned char *xnonce2;
|
||||
bool sapling;
|
||||
bool stale;
|
||||
} __attribute__ ((aligned (64)));
|
||||
} __attribute__ ((aligned (WORK_ALIGNMENT)));
|
||||
|
||||
struct stratum_job
|
||||
{
|
||||
@@ -416,7 +433,8 @@ struct stratum_job
|
||||
unsigned char *coinbase;
|
||||
unsigned char *xnonce2;
|
||||
int merkle_count;
|
||||
unsigned char **merkle;
|
||||
int merkle_buf_size;
|
||||
unsigned char **merkle;
|
||||
unsigned char version[4];
|
||||
unsigned char nbits[4];
|
||||
unsigned char ntime[4];
|
||||
@@ -540,7 +558,6 @@ enum algos {
|
||||
ALGO_BMW,
|
||||
ALGO_BMW512,
|
||||
ALGO_C11,
|
||||
ALGO_DECRED,
|
||||
ALGO_DEEP,
|
||||
ALGO_DMD_GR,
|
||||
ALGO_GROESTL,
|
||||
@@ -572,9 +589,11 @@ enum algos {
|
||||
ALGO_QUBIT,
|
||||
ALGO_SCRYPT,
|
||||
ALGO_SHA256D,
|
||||
ALGO_SHA256DT,
|
||||
ALGO_SHA256Q,
|
||||
ALGO_SHA256T,
|
||||
ALGO_SHA3D,
|
||||
ALGO_SHA512256D,
|
||||
ALGO_SHAVITE3,
|
||||
ALGO_SKEIN,
|
||||
ALGO_SKEIN2,
|
||||
@@ -634,7 +653,6 @@ static const char* const algo_names[] = {
|
||||
"bmw",
|
||||
"bmw512",
|
||||
"c11",
|
||||
"decred",
|
||||
"deep",
|
||||
"dmd-gr",
|
||||
"groestl",
|
||||
@@ -666,9 +684,11 @@ static const char* const algo_names[] = {
|
||||
"qubit",
|
||||
"scrypt",
|
||||
"sha256d",
|
||||
"sha256dt",
|
||||
"sha256q",
|
||||
"sha256t",
|
||||
"sha3d",
|
||||
"sha512256d",
|
||||
"shavite3",
|
||||
"skein",
|
||||
"skein2",
|
||||
@@ -795,7 +815,6 @@ Options:\n\
|
||||
bmw BMW 256\n\
|
||||
bmw512 BMW 512\n\
|
||||
c11 Chaincoin\n\
|
||||
decred Blake256r14dcr\n\
|
||||
deep Deepcoin (DCN)\n\
|
||||
dmd-gr Diamond\n\
|
||||
groestl Groestl coin\n\
|
||||
@@ -829,9 +848,11 @@ Options:\n\
|
||||
scrypt:N scrypt(N, 1, 1)\n\
|
||||
scryptn2 scrypt(1048576, 1,1)\n\
|
||||
sha256d Double SHA-256\n\
|
||||
sha256dt Modified sha256d (Novo)\n\
|
||||
sha256q Quad SHA-256, Pyrite (PYE)\n\
|
||||
sha256t Triple SHA-256, Onecoin (OC)\n\
|
||||
sha3d Double Keccak256 (BSHA3)\n\
|
||||
sha512256d Double SHA-512 (Radiant)\n\
|
||||
shavite3 Shavite3\n\
|
||||
skein Skein+Sha (Skeincoin)\n\
|
||||
skein2 Double Skein (Woodcoin)\n\
|
||||
|
2371
simd-utils/intrlv.h
2371
simd-utils/intrlv.h
File diff suppressed because it is too large
Load Diff
@@ -93,10 +93,15 @@ static inline uint32_t u32_mov128_32( const __m128i a )
|
||||
return n;
|
||||
}
|
||||
|
||||
// Equivalent of set1, broadcast integer to all elements.
|
||||
#define m128_const_i128( i ) mm128_mov64_128( i )
|
||||
#define m128_const1_64( i ) _mm_shuffle_epi32( mm128_mov64_128( i ), 0x44 )
|
||||
#define m128_const1_32( i ) _mm_shuffle_epi32( mm128_mov32_128( i ), 0x00 )
|
||||
// Emulate broadcast & insert instructions not available in SSE2
|
||||
#define mm128_bcast_i64( i ) _mm_shuffle_epi32( mm128_mov64_128( i ), 0x44 )
|
||||
#define mm128_bcast_i32( i ) _mm_shuffle_epi32( mm128_mov32_128( i ), 0x00 )
|
||||
|
||||
#define m128_const_i128( i ) mm128_mov64_128( i )
|
||||
|
||||
// deprecated
|
||||
#define m128_const1_64 mm128_bcast_i64
|
||||
#define m128_const1_32 mm128_bcast_i32
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
|
||||
@@ -104,7 +109,7 @@ static inline uint32_t u32_mov128_32( const __m128i a )
|
||||
#define m128_const_64( hi, lo ) \
|
||||
_mm_insert_epi64( mm128_mov64_128( lo ), hi, 1 )
|
||||
|
||||
#else // No insert in SSE2
|
||||
#else
|
||||
|
||||
#define m128_const_64 _mm_set_epi64x
|
||||
|
||||
@@ -114,12 +119,10 @@ static inline uint32_t u32_mov128_32( const __m128i a )
|
||||
|
||||
#define m128_zero _mm_setzero_si128()
|
||||
#define m128_one_128 mm128_mov64_128( 1 )
|
||||
#define m128_one_64 _mm_shuffle_epi32( mm128_mov64_128( 1 ), 0x44 )
|
||||
#define m128_one_32 _mm_shuffle_epi32( mm128_mov32_128( 1 ), 0x00 )
|
||||
#define m128_one_16 _mm_shuffle_epi32( \
|
||||
mm128_mov32_128( 0x00010001 ), 0x00 )
|
||||
#define m128_one_8 _mm_shuffle_epi32( \
|
||||
mm128_mov32_128( 0x01010101 ), 0x00 )
|
||||
#define m128_one_64 mm128_bcast_i64( 1 )
|
||||
#define m128_one_32 mm128_bcast_i32( 1 )
|
||||
#define m128_one_16 mm128_bcast_i32( 0x00010001 )
|
||||
#define m128_one_8 mm128_bcast_i32( 0x01010101 )
|
||||
|
||||
// ASM avoids the need to initialize return variable to avoid compiler warning.
|
||||
// Macro abstracts function parentheses to look like an identifier.
|
||||
@@ -149,7 +152,7 @@ static inline __m128i mm128_neg1_fn()
|
||||
// sizing. It's unique.
|
||||
//
|
||||
// It can:
|
||||
// - zero 32 bit elements of a 128 bit vector.
|
||||
// - zero any number of 32 bit elements of a 128 bit vector.
|
||||
// - extract any 32 bit element from one 128 bit vector and insert the
|
||||
// data to any 32 bit element of another 128 bit vector, or the same vector.
|
||||
// - do both simultaneoulsly.
|
||||
@@ -162,14 +165,21 @@ static inline __m128i mm128_neg1_fn()
|
||||
// c[5:4] destination element selector
|
||||
// c[7:6] source element selector
|
||||
|
||||
// Convert type and abbreviate name: e"x"tract "i"nsert "m"ask
|
||||
// Convert type and abbreviate name: eXtract Insert Mask = XIM
|
||||
#define mm128_xim_32( v1, v2, c ) \
|
||||
_mm_castps_si128( _mm_insert_ps( _mm_castsi128_ps( v1 ), \
|
||||
_mm_castsi128_ps( v2 ), c ) )
|
||||
|
||||
// Some examples of simple operations:
|
||||
/* Another way to do it with individual arguments.
|
||||
#define mm128_xim_32( v1, i1, v2, i2, mask ) \
|
||||
_mm_castps_si128( _mm_insert_ps( _mm_castsi128_ps( v1 ), \
|
||||
_mm_castsi128_ps( v2 ), \
|
||||
(mask) | ((i1)<<4) | ((i2)<<6) ) )
|
||||
*/
|
||||
|
||||
// Insert 32 bit integer into v at element c and return modified v.
|
||||
// Examples of simple operations using xim:
|
||||
|
||||
// Insert 32 bit integer into v at element c and return updated v.
|
||||
static inline __m128i mm128_insert_32( const __m128i v, const uint32_t i,
|
||||
const int c )
|
||||
{ return mm128_xim_32( v, mm128_mov32_128( i ), c<<4 ); }
|
||||
@@ -178,13 +188,12 @@ static inline __m128i mm128_insert_32( const __m128i v, const uint32_t i,
|
||||
static inline uint32_t mm128_extract_32( const __m128i v, const int c )
|
||||
{ return u32_mov128_32( mm128_xim_32( v, v, c<<6 ) ); }
|
||||
|
||||
// Clear (zero) 32 bit elements based on bits set in 4 bit mask.
|
||||
// Zero 32 bit elements when bit in mask is set.
|
||||
static inline __m128i mm128_mask_32( const __m128i v, const int m )
|
||||
{ return mm128_xim_32( v, v, m ); }
|
||||
|
||||
// Move element i2 of v2 to element i1 of v1. For reference and convenience,
|
||||
// it's faster to precalculate the index.
|
||||
#define mm128_shuflmov_32( v1, i1, v2, i2 ) \
|
||||
// Move element i2 of v2 to element i1 of v1 and return updated v1.
|
||||
#define mm128_mov32_32( v1, i1, v2, i2 ) \
|
||||
mm128_xim_32( v1, v2, ( (i1)<<4 ) | ( (i2)<<6 ) )
|
||||
|
||||
#endif // SSE4_1
|
||||
@@ -280,7 +289,7 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
|
||||
// Mask making
|
||||
// Equivalent of AVX512 _mm_movepi64_mask & _mm_movepi32_mask.
|
||||
// Returns 2 or 4 bit integer mask from MSB of 64 or 32 bit elements.
|
||||
// Returns 2 or 4 bit integer mask from MSBit of 64 or 32 bit elements.
|
||||
// Effectively a sign test.
|
||||
|
||||
#define mm_movmask_64( v ) \
|
||||
@@ -385,7 +394,8 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
#define mm128_rol_var_32( v, c ) \
|
||||
_mm_or_si128( _mm_slli_epi32( v, c ), _mm_srli_epi32( v, 32-(c) ) )
|
||||
|
||||
/* Not used
|
||||
// Cross lane shuffles
|
||||
//
|
||||
// Limited 2 input shuffle, combines shuffle with blend. The destination low
|
||||
// half is always taken from v1, and the high half from v2.
|
||||
#define mm128_shuffle2_64( v1, v2, c ) \
|
||||
@@ -395,19 +405,16 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
#define mm128_shuffle2_32( v1, v2, c ) \
|
||||
_mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps( v1 ), \
|
||||
_mm_castsi128_ps( v2 ), c ) );
|
||||
*/
|
||||
|
||||
//
|
||||
// Rotate vector elements accross all lanes
|
||||
|
||||
#define mm128_swap_64( v ) _mm_shuffle_epi32( v, 0x4e )
|
||||
#define mm128_shuflr_64 mm128_swap_64
|
||||
#define mm128_shufll_64 mm128_swap_64
|
||||
#define mm128_swap_64( v ) _mm_shuffle_epi32( v, 0x4e )
|
||||
#define mm128_shuflr_64 mm128_swap_64
|
||||
#define mm128_shufll_64 mm128_swap_64
|
||||
|
||||
#define mm128_shuflr_32( v ) _mm_shuffle_epi32( v, 0x39 )
|
||||
#define mm128_shufll_32( v ) _mm_shuffle_epi32( v, 0x93 )
|
||||
|
||||
/* Not used
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
// Rotate right by c bytes, no SSE2 equivalent.
|
||||
@@ -415,15 +422,12 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c )
|
||||
{ return _mm_alignr_epi8( v, v, c ); }
|
||||
|
||||
#endif
|
||||
*/
|
||||
|
||||
// Rotate byte elements within 64 or 32 bit lanes, AKA optimized bit rotations
|
||||
// for multiples of 8 bits. Uses ror/rol macros when AVX512 is available
|
||||
// (unlikely but faster), or when SSSE3 is not available (slower).
|
||||
// Rotate 64 bit lanes
|
||||
|
||||
#define mm128_swap64_32( v ) _mm_shuffle_epi32( v, 0xb1 )
|
||||
#define mm128_shuflr64_32 mm128_swap64_32
|
||||
#define mm128_shufll64_32 mm128_swap64_32
|
||||
#define mm128_shuflr64_32 mm128_swap64_32
|
||||
#define mm128_shufll64_32 mm128_swap64_32
|
||||
|
||||
#if defined(__SSSE3__) && !defined(__AVX512VL__)
|
||||
#define mm128_shuflr64_24( v ) \
|
||||
@@ -441,6 +445,8 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c )
|
||||
#define mm128_shuflr64_16( v ) mm128_ror_64( v, 16 )
|
||||
#endif
|
||||
|
||||
// Rotate 32 bit lanes
|
||||
|
||||
#if defined(__SSSE3__) && !defined(__AVX512VL__)
|
||||
#define mm128_swap32_16( v ) \
|
||||
_mm_shuffle_epi8( v, _mm_set_epi64x( \
|
||||
@@ -448,8 +454,8 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c )
|
||||
#else
|
||||
#define mm128_swap32_16( v ) mm128_ror_32( v, 16 )
|
||||
#endif
|
||||
#define mm128_shuflr32_16 mm128_swap32_16
|
||||
#define mm128_shufll32_16 mm128_swap32_16
|
||||
#define mm128_shuflr32_16 mm128_swap32_16
|
||||
#define mm128_shufll32_16 mm128_swap32_16
|
||||
|
||||
#if defined(__SSSE3__) && !defined(__AVX512VL__)
|
||||
#define mm128_shuflr32_8( v ) \
|
||||
@@ -464,6 +470,10 @@ static inline __m128i mm128_shuflr_x8( const __m128i v, const int c )
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
#define mm128_bswap_128( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x0001020304050607, \
|
||||
0x08090a0b0c0d0e0f ) )
|
||||
|
||||
#define mm128_bswap_64( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x08090a0b0c0d0e0f, \
|
||||
0x0001020304050607 ) )
|
||||
@@ -525,6 +535,9 @@ static inline __m128i mm128_bswap_16( __m128i v )
|
||||
return _mm_or_si128( _mm_slli_epi16( v, 8 ), _mm_srli_epi16( v, 8 ) );
|
||||
}
|
||||
|
||||
#define mm128_bswap_128( v ) \
|
||||
mm128_swap_64( mm128_bswap_64( v ) )
|
||||
|
||||
static inline void mm128_block_bswap_64( __m128i *d, const __m128i *s )
|
||||
{
|
||||
d[0] = mm128_bswap_64( s[0] );
|
||||
@@ -558,12 +571,11 @@ static inline void mm128_block_bswap_32( __m128i *d, const __m128i *s )
|
||||
v2 = _mm_xor_si128( v1, v2 ); \
|
||||
v1 = _mm_xor_si128( v1, v2 );
|
||||
|
||||
// Concatenate { hi, lo }, rotate right by c elements and return low 128 bits.
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
// _mm_alignr_epi32 & _mm_alignr_epi64 are only available with AVX512VL but
|
||||
// are emulated here using _mm_alignr_epi8. There are no fast equivalents for
|
||||
// 256 bit vectors, though there is no for this functionality.
|
||||
// alignr instruction for 32 & 64 bit elements is only available with AVX512
|
||||
// but emulated here. Behaviour is consistent with Intel alignr intrinsics.
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
#define mm128_alignr_64( hi, lo, c ) _mm_alignr_epi8( hi, lo, (c)*8 )
|
||||
#define mm128_alignr_32( hi, lo, c ) _mm_alignr_epi8( hi, lo, (c)*4 )
|
||||
|
@@ -15,6 +15,8 @@
|
||||
//
|
||||
// "_mm256_shuffle_epi8" and "_mm256_alignr_epi8" are restricted to 128 bit
|
||||
// lanes and data can't cross the 128 bit lane boundary.
|
||||
// Full width byte shuffle is available with AVX512VL using the mask version
|
||||
// with a full mask (-1).
|
||||
// Instructions that can move data across 128 bit lane boundary incur a
|
||||
// performance penalty over those that can't.
|
||||
// Some usage of index vectors may be encoded as if full vector shuffles are
|
||||
@@ -66,31 +68,33 @@ typedef union
|
||||
#define u32_mov256_32( v ) u32_mov128_32( _mm256_castsi256_si128( v ) )
|
||||
|
||||
// concatenate two 128 bit vectors into one 256 bit vector: { hi, lo }
|
||||
|
||||
#define mm256_concat_128( hi, lo ) \
|
||||
_mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 1 )
|
||||
|
||||
#define mm256_bcast_m128( v ) \
|
||||
_mm256_permute4x64_epi64( _mm256_castsi128_si256( v ), 0x44 )
|
||||
#define mm256_bcast_i128( i ) mm256_bcast_m128( mm128_mov64_128( i ) )
|
||||
#define mm256_bcast_i64( i ) _mm256_broadcastq_epi64( mm128_mov64_128( i ) )
|
||||
#define mm256_bcast_i32( i ) _mm256_broadcastd_epi32( mm128_mov32_128( i ) )
|
||||
#define mm256_bcast_i16( i ) _mm256_broadcastw_epi16( mm128_mov32_128( i ) )
|
||||
#define mm256_bcast_i8( i ) _mm256_broadcastb_epi8 ( mm128_mov32_128( i ) )
|
||||
|
||||
// Equivalent of set, move 64 bit integer constants to respective 64 bit
|
||||
// elements.
|
||||
static inline __m256i m256_const_64( const uint64_t i3, const uint64_t i2,
|
||||
const uint64_t i1, const uint64_t i0 )
|
||||
{
|
||||
union { __m256i m256i;
|
||||
uint64_t u64[4]; } v;
|
||||
union { __m256i m256i; uint64_t u64[4]; } v;
|
||||
v.u64[0] = i0; v.u64[1] = i1; v.u64[2] = i2; v.u64[3] = i3;
|
||||
return v.m256i;
|
||||
}
|
||||
|
||||
// Equivalent of set1.
|
||||
// 128 bit vector argument
|
||||
#define m256_const1_128( v ) \
|
||||
_mm256_permute4x64_epi64( _mm256_castsi128_si256( v ), 0x44 )
|
||||
// 64 bit integer argument zero extended to 128 bits.
|
||||
#define m256_const1_i128( i ) m256_const1_128( mm128_mov64_128( i ) )
|
||||
#define m256_const1_64( i ) _mm256_broadcastq_epi64( mm128_mov64_128( i ) )
|
||||
#define m256_const1_32( i ) _mm256_broadcastd_epi32( mm128_mov32_128( i ) )
|
||||
#define m256_const1_16( i ) _mm256_broadcastw_epi16( mm128_mov32_128( i ) )
|
||||
#define m256_const1_8 ( i ) _mm256_broadcastb_epi8 ( mm128_mov32_128( i ) )
|
||||
// Deprecated
|
||||
#define m256_const1_128 mm256_bcast_m128
|
||||
#define m256_const1_i128 mm256_bcast_i128
|
||||
#define m256_const1_64 mm256_bcast_i64
|
||||
#define m256_const1_32 mm256_bcast_i32
|
||||
|
||||
#define m256_const2_64( i1, i0 ) \
|
||||
m256_const1_128( m128_const_64( i1, i0 ) )
|
||||
@@ -99,13 +103,13 @@ static inline __m256i m256_const_64( const uint64_t i3, const uint64_t i2,
|
||||
// All SIMD constant macros are actually functions containing executable
|
||||
// code and therefore can't be used as compile time initializers.
|
||||
|
||||
#define m256_zero _mm256_setzero_si256()
|
||||
#define m256_one_256 mm256_mov64_256( 1 )
|
||||
#define m256_one_128 m256_const1_i128( 1 )
|
||||
#define m256_one_64 _mm256_broadcastq_epi64( mm128_mov64_128( 1 ) )
|
||||
#define m256_one_32 _mm256_broadcastd_epi32( mm128_mov64_128( 1 ) )
|
||||
#define m256_one_16 _mm256_broadcastw_epi16( mm128_mov64_128( 1 ) )
|
||||
#define m256_one_8 _mm256_broadcastb_epi8 ( mm128_mov64_128( 1 ) )
|
||||
#define m256_zero _mm256_setzero_si256()
|
||||
#define m256_one_256 mm256_mov64_256( 1 )
|
||||
#define m256_one_128 mm256_bcast_i128( 1 )
|
||||
#define m256_one_64 mm256_bcast_i64( 1 )
|
||||
#define m256_one_32 mm256_bcast_i32( 1 )
|
||||
#define m256_one_16 mm256_bcast_i16( 1 )
|
||||
#define m256_one_8 mm256_bcast_i8 ( 1 )
|
||||
|
||||
static inline __m256i mm256_neg1_fn()
|
||||
{
|
||||
@@ -116,8 +120,8 @@ static inline __m256i mm256_neg1_fn()
|
||||
#define m256_neg1 mm256_neg1_fn()
|
||||
|
||||
// Consistent naming for similar operations.
|
||||
#define mm128_extr_lo128_256( v ) _mm256_castsi256_si128( v )
|
||||
#define mm128_extr_hi128_256( v ) _mm256_extracti128_si256( v, 1 )
|
||||
#define mm128_extr_lo128_256( v ) _mm256_castsi256_si128( v )
|
||||
#define mm128_extr_hi128_256( v ) _mm256_extracti128_si256( v, 1 )
|
||||
|
||||
//
|
||||
// Memory functions
|
||||
@@ -239,8 +243,8 @@ static inline __m256i mm256_not( const __m256i v )
|
||||
|
||||
// Mask making
|
||||
// Equivalent of AVX512 _mm256_movepi64_mask & _mm256_movepi32_mask.
|
||||
// Create a 64 or 32 bit integer mask from MSB of 64 or 32 bit elements.
|
||||
// Effectively a sign test: if (mask[n]) then -1 else 0.
|
||||
// Returns 4 or 8 bit integer mask from MSBit of 64 or 32 bit elements.
|
||||
// Effectively a sign test.
|
||||
|
||||
#define mm256_movmask_64( v ) \
|
||||
_mm256_castpd_si256( _mm256_movmask_pd( _mm256_castsi256_pd( v ) ) )
|
||||
@@ -348,23 +352,27 @@ static inline __m256i mm256_not( const __m256i v )
|
||||
_mm256_or_si256( _mm256_slli_epi16( v, c ), \
|
||||
_mm256_srli_epi16( v, 16-(c) ) )
|
||||
|
||||
// Deprecated. Obsolete sm3, the only user, is grandfathered.
|
||||
// Deprecated.
|
||||
#define mm256_rol_var_32( v, c ) \
|
||||
_mm256_or_si256( _mm256_slli_epi32( v, c ), \
|
||||
_mm256_srli_epi32( v, 32-(c) ) )
|
||||
|
||||
//
|
||||
// Cross lane shuffles
|
||||
//
|
||||
// Rotate elements accross all lanes.
|
||||
|
||||
// Swap 128 bit elements in 256 bit vector.
|
||||
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
|
||||
#define mm256_shuflr_128 mm256_swap_128
|
||||
#define mm256_shufll_128 mm256_swap_128
|
||||
#define mm256_shuflr_128 mm256_swap_128
|
||||
#define mm256_shufll_128 mm256_swap_128
|
||||
|
||||
// Rotate 256 bit vector by one 64 bit element
|
||||
#define mm256_shuflr_64( v ) _mm256_permute4x64_epi64( v, 0x39 )
|
||||
#define mm256_shufll_64( v ) _mm256_permute4x64_epi64( v, 0x93 )
|
||||
|
||||
|
||||
/* Not used
|
||||
// Rotate 256 bit vector by one 32 bit element.
|
||||
#if defined(__AVX512VL__)
|
||||
|
||||
@@ -387,11 +395,11 @@ static inline __m256i mm256_shufll_32( const __m256i v )
|
||||
0x0000000200000001, 0x0000000000000007 ) )
|
||||
|
||||
#endif
|
||||
*/
|
||||
|
||||
//
|
||||
// Rotate elements within each 128 bit lane of 256 bit vector.
|
||||
|
||||
/* Not used
|
||||
// Limited 2 input shuffle
|
||||
#define mm256_shuffle2_64( v1, v2, c ) \
|
||||
_mm256_castpd_si256( _mm256_shuffle_pd( _mm256_castsi256_pd( v1 ), \
|
||||
@@ -400,7 +408,6 @@ static inline __m256i mm256_shufll_32( const __m256i v )
|
||||
#define mm256_shuffle2_32( v1, v2, c ) \
|
||||
_mm256_castps_si256( _mm256_shuffle_ps( _mm256_castsi256_ps( v1 ), \
|
||||
_mm256_castsi256_ps( v2 ), c ) );
|
||||
*/
|
||||
|
||||
#define mm256_swap128_64( v ) _mm256_shuffle_epi32( v, 0x4e )
|
||||
#define mm256_shuflr128_64 mm256_swap128_64
|
||||
@@ -412,20 +419,17 @@ static inline __m256i mm256_shufll_32( const __m256i v )
|
||||
static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c )
|
||||
{ return _mm256_alignr_epi8( v, v, c ); }
|
||||
|
||||
// Rotate byte elements within 64 or 32 bit lanes, AKA optimized bit
|
||||
// rotations for multiples of 8 bits. Uses faster ror/rol instructions when
|
||||
// AVX512 is available.
|
||||
// 64 bit lanes
|
||||
|
||||
#define mm256_swap64_32( v ) _mm256_shuffle_epi32( v, 0xb1 )
|
||||
#define mm256_shuflr64_32 mm256_swap64_32
|
||||
#define mm256_shufll64_32 mm256_swap64_32
|
||||
#define mm256_swap64_32( v ) _mm256_shuffle_epi32( v, 0xb1 )
|
||||
#define mm256_shuflr64_32 mm256_swap64_32
|
||||
#define mm256_shufll64_32 mm256_swap64_32
|
||||
|
||||
#if defined(__AVX512VL__)
|
||||
#define mm256_shuflr64_24( v ) _mm256_ror_epi64( v, 24 )
|
||||
#else
|
||||
#define mm256_shuflr64_24( v ) \
|
||||
_mm256_shuffle_epi8( v, _mm256_set_epi64x( \
|
||||
0x0a09080f0e0d0c0b, 0x0201000706050403, \
|
||||
_mm256_shuffle_epi8( v, m256_const2_64( \
|
||||
0x0a09080f0e0d0c0b, 0x0201000706050403 ) )
|
||||
#endif
|
||||
|
||||
@@ -433,21 +437,21 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c )
|
||||
#define mm256_shuflr64_16( v ) _mm256_ror_epi64( v, 16 )
|
||||
#else
|
||||
#define mm256_shuflr64_16( v ) \
|
||||
_mm256_shuffle_epi8( v, _mm256_set_epi64x( \
|
||||
0x09080f0e0d0c0b0a, 0x0100070605040302, \
|
||||
_mm256_shuffle_epi8( v, m256_const2_64( \
|
||||
0x09080f0e0d0c0b0a, 0x0100070605040302 ) )
|
||||
#endif
|
||||
|
||||
// 32 bit lanes
|
||||
|
||||
#if defined(__AVX512VL__)
|
||||
#define mm256_swap32_16( v ) _mm256_ror_epi32( v, 16 )
|
||||
#else
|
||||
#define mm256_swap32_16( v ) \
|
||||
_mm256_shuffle_epi8( v, _mm256_set_epi64x( \
|
||||
0x0d0c0f0e09080b0a, 0x0504070601000302, \
|
||||
_mm256_shuffle_epi8( v, m256_const2_64( \
|
||||
0x0d0c0f0e09080b0a, 0x0504070601000302 ) )
|
||||
#endif
|
||||
#define mm256_shuflr32_16 mm256_swap32_16
|
||||
#define mm256_shufll32_16 mm256_swap32_16
|
||||
#define mm256_shuflr32_16 mm256_swap32_16
|
||||
#define mm256_shufll32_16 mm256_swap32_16
|
||||
|
||||
#if defined(__AVX512VL__)
|
||||
#define mm256_shuflr32_8( v ) _mm256_ror_epi32( v, 8 )
|
||||
@@ -458,35 +462,24 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c )
|
||||
0x0c0f0e0d080b0a09, 0x0407060500030201 ) )
|
||||
#endif
|
||||
|
||||
// NOTE: _mm256_shuffle_epi8, like most shuffles, is restricted to 128 bit
|
||||
// lanes. AVX512, however, supports full vector 8 bit shuffle. The AVX512VL +
|
||||
// AVX512BW intrinsic _mm256_mask_shuffle_epi8 with a NULL mask, can be used if
|
||||
// needed for a shuffle that crosses 128 bit lanes. BSWAP doesn't therefore the
|
||||
// AVX2 version will work here. The bswap control vector is coded to work
|
||||
// with both versions, bit 4 is ignored in AVX2.
|
||||
|
||||
// Reverse byte order in elements, endian bswap.
|
||||
#define mm256_bswap_64( v ) \
|
||||
_mm256_shuffle_epi8( v, \
|
||||
m256_const_64( 0x18191a1b1c1d1e1f, 0x1011121314151617, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ) )
|
||||
m256_const2_64( 0x08090a0b0c0d0e0f, 0x0001020304050607 ) )
|
||||
|
||||
#define mm256_bswap_32( v ) \
|
||||
_mm256_shuffle_epi8( v, \
|
||||
m256_const_64( 0x1c1d1e1f18191a1b, 0x1415161710111213, \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ) )
|
||||
m256_const2_64( 0x0c0d0e0f08090a0b, 0x0405060700010203 ) )
|
||||
|
||||
#define mm256_bswap_16( v ) \
|
||||
_mm256_shuffle_epi8( v, \
|
||||
m256_const_64( 0x1e1f1c1d1a1b1819, 0x1617141512131011, \
|
||||
0x0e0f0c0d0a0b0809, 0x0607040502030001, ) )
|
||||
m256_const2_64( 0x0e0f0c0d0a0b0809, 0x0607040502030001, ) )
|
||||
|
||||
// Source and destination are pointers, may point to same memory.
|
||||
// 8 byte qword * 8 qwords * 4 lanes = 256 bytes
|
||||
#define mm256_block_bswap_64( d, s ) do \
|
||||
{ \
|
||||
__m256i ctl = m256_const_64( 0x18191a1b1c1d1e1f, 0x1011121314151617, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ) ; \
|
||||
__m256i ctl = m256_const2_64( 0x08090a0b0c0d0e0f, 0x0001020304050607 ) ; \
|
||||
casti_m256i( d, 0 ) = _mm256_shuffle_epi8( casti_m256i( s, 0 ), ctl ); \
|
||||
casti_m256i( d, 1 ) = _mm256_shuffle_epi8( casti_m256i( s, 1 ), ctl ); \
|
||||
casti_m256i( d, 2 ) = _mm256_shuffle_epi8( casti_m256i( s, 2 ), ctl ); \
|
||||
@@ -500,8 +493,7 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c )
|
||||
// 4 byte dword * 8 dwords * 8 lanes = 256 bytes
|
||||
#define mm256_block_bswap_32( d, s ) do \
|
||||
{ \
|
||||
__m256i ctl = m256_const_64( 0x1c1d1e1f18191a1b, 0x1415161710111213, \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
|
||||
__m256i ctl = m256_const2_64( 0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
|
||||
casti_m256i( d, 0 ) = _mm256_shuffle_epi8( casti_m256i( s, 0 ), ctl ); \
|
||||
casti_m256i( d, 1 ) = _mm256_shuffle_epi8( casti_m256i( s, 1 ), ctl ); \
|
||||
casti_m256i( d, 2 ) = _mm256_shuffle_epi8( casti_m256i( s, 2 ), ctl ); \
|
||||
@@ -513,8 +505,7 @@ static inline __m256i mm256_shuflr128_x8( const __m256i v, const int c )
|
||||
} while(0)
|
||||
|
||||
// swap 256 bit vectors in place.
|
||||
// Deprecated, Shabal is the only user and it should be modified to reorder
|
||||
// instructions.
|
||||
// This should be avoided, it's more efficient to switch references.
|
||||
#define mm256_swap512_256( v1, v2 ) \
|
||||
v1 = _mm256_xor_si256( v1, v2 ); \
|
||||
v2 = _mm256_xor_si256( v1, v2 ); \
|
||||
|
@@ -113,7 +113,17 @@ static inline __m512i mm512_perm_128( const __m512i v, const int c )
|
||||
#define mm512_concat_256( hi, lo ) \
|
||||
_mm512_inserti64x4( _mm512_castsi256_si512( lo ), hi, 1 )
|
||||
|
||||
#define m512_const_128( v3, v2, v1, v0 ) \
|
||||
// Work in progress.
|
||||
// modified naming scheme to align more with opcode mnenonic:
|
||||
// m512_const1 becomes mm512_bcast_m[n] or mm512_bcast_i[n], short for
|
||||
// broadcast, i indicates integer arg, m is vector. Set1 intrinsics should
|
||||
// genarally be used for integer data.
|
||||
// mm512_const should only be used with immediate integer arguments, use
|
||||
// _mm512_set intrinsic instead.
|
||||
// mm512_set, mm512_set[n] macros may be defined when no intrinsic exists
|
||||
// for either the arg size or arg count.
|
||||
|
||||
#define mm512_set_128( v3, v2, v1, v0 ) \
|
||||
mm512_concat_256( mm256_concat_128( v3, v2 ), \
|
||||
mm256_concat_128( v1, v0 ) )
|
||||
|
||||
@@ -133,29 +143,35 @@ static inline __m512i m512_const_64( const uint64_t i7, const uint64_t i6,
|
||||
return v.m512i;
|
||||
}
|
||||
|
||||
// Broadcast with vector argument is generally more efficient except for
|
||||
// integer immediate constants or when data was most recently referenced as
|
||||
// integer and is still available in an integer register.
|
||||
|
||||
/* not used
|
||||
// Equivalent of set1, broadcast lo element to all elements.
|
||||
static inline __m512i m512_const1_256( const __m256i v )
|
||||
{ return _mm512_inserti64x4( _mm512_castsi256_si512( v ), v, 1 ); }
|
||||
*/
|
||||
|
||||
#define m512_const1_128( v ) \
|
||||
mm512_perm_128( _mm512_castsi128_si512( v ), 0 )
|
||||
// Integer input argument up to 64 bits
|
||||
#define m512_const1_i128( i ) \
|
||||
mm512_perm_128( _mm512_castsi128_si512( mm128_mov64_128( i ) ), 0 )
|
||||
#define mm512_bcast_m128( v ) mm512_perm_128( _mm512_castsi128_si512( v ), 0 )
|
||||
// Low 64 bits only, high 64 bits are zeroed.
|
||||
#define mm512_bcast_i128( i ) mm512_bcast_m128( mm128_mov64_128( i ) )
|
||||
#define mm512_bcast_i64( i ) _mm512_broadcastq_epi64( mm128_mov64_128( i ) )
|
||||
#define mm512_bcast_i32( i ) _mm512_broadcastd_epi32( mm128_mov32_128( i ) )
|
||||
#define mm512_bcast_i16( i ) _mm512_broadcastw_epi16( mm128_mov32_128( i ) )
|
||||
#define mm512_bcast_i8( i ) _mm512_broadcastb_epi8( mm128_mov32_128( i ) )
|
||||
|
||||
//#define m512_const1_256( v ) _mm512_broadcast_i64x4( v )
|
||||
//#define m512_const1_128( v ) _mm512_broadcast_i64x2( v )
|
||||
#define m512_const1_64( i ) _mm512_broadcastq_epi64( mm128_mov64_128( i ) )
|
||||
#define m512_const1_32( i ) _mm512_broadcastd_epi32( mm128_mov32_128( i ) )
|
||||
#define m512_const1_16( i ) _mm512_broadcastw_epi16( mm128_mov32_128( i ) )
|
||||
#define m512_const1_8( i ) _mm512_broadcastb_epi8 ( mm128_mov32_128( i ) )
|
||||
// const1 is deprecated, use bcast instead
|
||||
#define m512_const1_128 mm512_bcast_m128
|
||||
#define m512_const1_i128 mm512_bcast_i128
|
||||
#define m512_const1_64 mm512_bcast_i64
|
||||
#define m512_const1_32 mm512_bcast_i32
|
||||
|
||||
#define m512_const2_128( v1, v0 ) \
|
||||
m512_const1_256( _mm512_inserti64x2( _mm512_castsi128_si512( v0 ), v1, 1 ) )
|
||||
_mm512_inserti64x2( _mm512_castsi128_si512( v0 ), v1, 1 )
|
||||
|
||||
#define m512_const2_64( i1, i0 ) \
|
||||
m512_const1_128( m128_const_64( i1, i0 ) )
|
||||
|
||||
mm512_bcast_m128( m128_const_64( i1, i0 ) )
|
||||
|
||||
static inline __m512i m512_const4_64( const uint64_t i3, const uint64_t i2,
|
||||
const uint64_t i1, const uint64_t i0 )
|
||||
@@ -179,11 +195,11 @@ static inline __m512i m512_const4_64( const uint64_t i3, const uint64_t i2,
|
||||
#define m512_zero _mm512_setzero_si512()
|
||||
#define m512_one_512 mm512_mov64_512( 1 )
|
||||
#define m512_one_256 _mm512_inserti64x4( m512_one_512, m256_one_256, 1 )
|
||||
#define m512_one_128 m512_const1_i128( 1 )
|
||||
#define m512_one_64 m512_const1_64( 1 )
|
||||
#define m512_one_32 m512_const1_32( 1 )
|
||||
#define m512_one_16 m512_const1_16( 1 )
|
||||
#define m512_one_8 m512_const1_8( 1 )
|
||||
#define m512_one_128 mm512_bcast_i128( (__uint128_t)1 )
|
||||
#define m512_one_64 mm512_bcast_i64( (uint64_t)1 )
|
||||
#define m512_one_32 mm512_bcast_i32( (uint32_t)1 )
|
||||
#define m512_one_16 mm512_bcast_i16( (uint16_t)1 )
|
||||
#define m512_one_8 mm512_bcast_i8( (uint8_t)1 )
|
||||
|
||||
// use asm to avoid compiler warning for unitialized local
|
||||
static inline __m512i mm512_neg1_fn()
|
||||
@@ -193,8 +209,6 @@ static inline __m512i mm512_neg1_fn()
|
||||
return a;
|
||||
}
|
||||
#define m512_neg1 mm512_neg1_fn() // 1 clock
|
||||
//#define m512_neg1 m512_const1_64( 0xffffffffffffffff ) // 5 clocks
|
||||
//#define m512_neg1 _mm512_movm_epi64( 0xff ) // 2 clocks
|
||||
|
||||
//
|
||||
// Basic operations without SIMD equivalent
|
||||
@@ -343,10 +357,10 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
|
||||
// 8 lanes of 64 bytes each
|
||||
#define mm512_block_bswap_64( d, s ) do \
|
||||
{ \
|
||||
__m512i ctl = m512_const_64( 0x38393a3b3c3d3e3f, 0x3031323334353637, \
|
||||
0x28292a2b2c2d2e2f, 0x2021222324252627, \
|
||||
0x18191a1b1c1d1e1f, 0x1011121314151617, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
|
||||
const __m512i ctl = m512_const_64( 0x38393a3b3c3d3e3f, 0x3031323334353637, \
|
||||
0x28292a2b2c2d2e2f, 0x2021222324252627, \
|
||||
0x18191a1b1c1d1e1f, 0x1011121314151617, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
|
||||
casti_m512i( d, 0 ) = _mm512_shuffle_epi8( casti_m512i( s, 0 ), ctl ); \
|
||||
casti_m512i( d, 1 ) = _mm512_shuffle_epi8( casti_m512i( s, 1 ), ctl ); \
|
||||
casti_m512i( d, 2 ) = _mm512_shuffle_epi8( casti_m512i( s, 2 ), ctl ); \
|
||||
@@ -360,10 +374,10 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
|
||||
// 16 lanes of 32 bytes each
|
||||
#define mm512_block_bswap_32( d, s ) do \
|
||||
{ \
|
||||
__m512i ctl = m512_const_64( 0x3c3d3e3f38393a3b, 0x3435363730313233, \
|
||||
0x2c2d2e2f28292a2b, 0x2425262720212223, \
|
||||
0x1c1d1e1f18191a1b, 0x1415161710111213, \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
|
||||
const __m512i ctl = m512_const_64( 0x3c3d3e3f38393a3b, 0x3435363730313233, \
|
||||
0x2c2d2e2f28292a2b, 0x2425262720212223, \
|
||||
0x1c1d1e1f18191a1b, 0x1415161710111213, \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
|
||||
casti_m512i( d, 0 ) = _mm512_shuffle_epi8( casti_m512i( s, 0 ), ctl ); \
|
||||
casti_m512i( d, 1 ) = _mm512_shuffle_epi8( casti_m512i( s, 1 ), ctl ); \
|
||||
casti_m512i( d, 2 ) = _mm512_shuffle_epi8( casti_m512i( s, 2 ), ctl ); \
|
||||
@@ -409,7 +423,6 @@ static inline __m512i mm512_shuflr_x64( const __m512i v, const int n )
|
||||
static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
{ return _mm512_alignr_epi32( v, v, n ); }
|
||||
|
||||
/* Not used
|
||||
#define mm512_shuflr_16( v ) \
|
||||
_mm512_permutexvar_epi16( m512_const_64( \
|
||||
0x0000001F001E001D, 0x001C001B001A0019, \
|
||||
@@ -437,7 +450,6 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
0x2E2D2C2B2A292827, 0x262524232221201F, \
|
||||
0x1E1D1C1B1A191817, 0x161514131211100F, \
|
||||
0x0E0D0C0B0A090807, 0x060504030201003F ) )
|
||||
*/
|
||||
|
||||
// 256 bit lanes used only by lyra2, move these there
|
||||
// Rotate elements within 256 bit lanes of 512 bit vector.
|
||||
@@ -451,7 +463,7 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
#define mm512_shuflr256_64( v ) _mm512_permutex_epi64( v, 0x39 )
|
||||
#define mm512_shufll256_64( v ) _mm512_permutex_epi64( v, 0x93 )
|
||||
|
||||
/* Not used
|
||||
/* Not used
|
||||
// Rotate 256 bit lanes by one 32 bit element
|
||||
#define mm512_shuflr256_32( v ) \
|
||||
_mm512_permutexvar_epi32( m512_const_64( \
|
||||
@@ -498,7 +510,18 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
//
|
||||
// Shuffle/rotate elements within 128 bit lanes of 512 bit vector.
|
||||
|
||||
/* Not used
|
||||
#define mm512_swap128_64( v ) _mm512_shuffle_epi32( v, 0x4e )
|
||||
#define mm512_shuflr128_64 mm512_swap128_64
|
||||
#define mm512_shufll128_64 mm512_swap128_64
|
||||
|
||||
// Rotate 128 bit lanes by one 32 bit element
|
||||
#define mm512_shuflr128_32( v ) _mm512_shuffle_epi32( v, 0x39 )
|
||||
#define mm512_shufll128_32( v ) _mm512_shuffle_epi32( v, 0x93 )
|
||||
|
||||
// Rotate 128 bit lanes right by c bytes, versatile and just as fast
|
||||
static inline __m512i mm512_shuflr128_8( const __m512i v, const int c )
|
||||
{ return _mm512_alignr_epi8( v, v, c ); }
|
||||
|
||||
// Limited 2 input, 1 output shuffle, combines shuffle with blend.
|
||||
// Like most shuffles it's limited to 128 bit lanes and like some shuffles
|
||||
// destination elements must come from a specific source arg.
|
||||
@@ -509,32 +532,12 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
#define mm512_shuffle2_32( v1, v2, c ) \
|
||||
_mm512_castps_si512( _mm512_shuffle_ps( _mm512_castsi512_ps( v1 ), \
|
||||
_mm512_castsi512_ps( v2 ), c ) );
|
||||
*/
|
||||
|
||||
// These hard coded shuffles exist for consistency with AVX2 & SSE2 where
|
||||
// efficient generic versions don't exist.
|
||||
// Swap 64 bits in each 128 bit lane
|
||||
#define mm512_swap128_64( v ) _mm512_shuffle_epi32( v, 0x4e )
|
||||
#define mm512_shuflr128_64 mm512_swap128_64
|
||||
#define mm512_shufll128_64 mm512_swap128_64
|
||||
|
||||
// Rotate 128 bit lanes by one 32 bit element
|
||||
#define mm512_shuflr128_32( v ) _mm512_shuffle_epi32( v, 0x39 )
|
||||
#define mm512_shufll128_32( v ) _mm512_shuffle_epi32( v, 0x93 )
|
||||
|
||||
/* Not used
|
||||
// Rotate right 128 bit lanes by c bytes, efficient generic version of above.
|
||||
static inline __m512i mm512_shuflr128_8( const __m512i v, const int c )
|
||||
{ return _mm512_alignr_epi8( v, v, c ); }
|
||||
*/
|
||||
|
||||
// Rotate byte elements in each 64 or 32 bit lane. Redundant for AVX512, all
|
||||
// can be done with ror & rol. Defined only for convenience and consistency
|
||||
// with AVX2 & SSE2 macros.
|
||||
// 64 bit lanes
|
||||
|
||||
#define mm512_swap64_32( v ) _mm512_shuffle_epi32( v, 0xb1 )
|
||||
#define mm512_shuflr64_32 mm512_swap64_32
|
||||
#define mm512_shufll64_32 mm512_swap64_32
|
||||
#define mm512_shuflr64_32 mm512_swap64_32
|
||||
#define mm512_shufll64_32 mm512_swap64_32
|
||||
|
||||
#define mm512_shuflr64_24( v ) _mm512_ror_epi64( v, 24 )
|
||||
#define mm512_shufll64_24( v ) _mm512_rol_epi64( v, 24 )
|
||||
@@ -545,12 +548,14 @@ static inline __m512i mm512_shuflr128_8( const __m512i v, const int c )
|
||||
#define mm512_shuflr64_8( v ) _mm512_ror_epi64( v, 8 )
|
||||
#define mm512_shufll64_8( v ) _mm512_rol_epi64( v, 8 )
|
||||
|
||||
#define mm512_swap32_16( v ) _mm512_ror_epi32( v, 16 )
|
||||
#define mm512_shuflr32_16 mm512_swap32_16
|
||||
#define mm512_shufll32_16 mm512_swap32_16
|
||||
// 32 bit lanes
|
||||
|
||||
#define mm512_shuflr32_8( v ) _mm512_ror_epi32( v, 8 )
|
||||
#define mm512_shufll32_8( v ) _mm512_rol_epi32( v, 8 )
|
||||
#define mm512_swap32_16( v ) _mm512_ror_epi32( v, 16 )
|
||||
#define mm512_shuflr32_16 mm512_swap32_16
|
||||
#define mm512_shufll32_16 mm512_swap32_16
|
||||
|
||||
#define mm512_shuflr32_8( v ) _mm512_ror_epi32( v, 8 )
|
||||
#define mm512_shufll32_8( v ) _mm512_rol_epi32( v, 8 )
|
||||
|
||||
#endif // AVX512
|
||||
#endif // SIMD_512_H__
|
||||
|
@@ -55,6 +55,13 @@
|
||||
typedef __int128 int128_t;
|
||||
typedef unsigned __int128 uint128_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
uint128_t u128;
|
||||
uint64_t u64[2];
|
||||
uint32_t u32[4];
|
||||
} __attribute__ ((aligned (16))) u128_ovly;
|
||||
|
||||
// Extracting the low bits is a trivial cast.
|
||||
// These specialized functions are optimized while providing a
|
||||
// consistent interface.
|
||||
|
345
util.c
345
util.c
@@ -44,28 +44,22 @@
|
||||
#include <libgen.h>
|
||||
#endif
|
||||
|
||||
//#include "miner.h"
|
||||
#include "elist.h"
|
||||
#include "algo-gate-api.h"
|
||||
#include "algo/sha/sha256d.h"
|
||||
|
||||
//extern pthread_mutex_t stats_lock;
|
||||
|
||||
struct data_buffer {
|
||||
void *buf;
|
||||
size_t len;
|
||||
};
|
||||
|
||||
struct upload_buffer {
|
||||
const void *buf;
|
||||
size_t len;
|
||||
size_t pos;
|
||||
};
|
||||
|
||||
struct header_info {
|
||||
char *lp_path;
|
||||
char *reason;
|
||||
char *stratum_url;
|
||||
size_t content_length;
|
||||
};
|
||||
|
||||
struct data_buffer {
|
||||
void *buf;
|
||||
size_t len;
|
||||
size_t allocated;
|
||||
struct header_info *headers;
|
||||
};
|
||||
|
||||
struct tq_ent {
|
||||
@@ -127,7 +121,6 @@ void applog2( int prio, const char *fmt, ... )
|
||||
int len;
|
||||
// struct tm tm;
|
||||
// time_t now = time(NULL);
|
||||
|
||||
// localtime_r(&now, &tm);
|
||||
|
||||
switch ( prio )
|
||||
@@ -395,67 +388,53 @@ static void databuf_free(struct data_buffer *db)
|
||||
static size_t all_data_cb(const void *ptr, size_t size, size_t nmemb,
|
||||
void *user_data)
|
||||
{
|
||||
struct data_buffer *db = (struct data_buffer *) user_data;
|
||||
struct data_buffer *db = user_data;
|
||||
size_t len = size * nmemb;
|
||||
size_t oldlen, newlen;
|
||||
size_t newalloc, reqalloc;
|
||||
void *newmem;
|
||||
static const unsigned char zero = 0;
|
||||
static const size_t max_realloc_increase = 8 * 1024 * 1024;
|
||||
static const size_t initial_alloc = 16 * 1024;
|
||||
|
||||
oldlen = db->len;
|
||||
newlen = oldlen + len;
|
||||
/* minimum required allocation size */
|
||||
reqalloc = db->len + len + 1;
|
||||
|
||||
newmem = realloc(db->buf, newlen + 1);
|
||||
if (!newmem)
|
||||
return 0;
|
||||
if (reqalloc > db->allocated) {
|
||||
if (db->len > 0) {
|
||||
newalloc = db->allocated * 2;
|
||||
} else {
|
||||
if (db->headers->content_length > 0)
|
||||
newalloc = db->headers->content_length + 1;
|
||||
else
|
||||
newalloc = initial_alloc;
|
||||
}
|
||||
|
||||
db->buf = newmem;
|
||||
db->len = newlen;
|
||||
memcpy((uchar*) db->buf + oldlen, ptr, len);
|
||||
memcpy((uchar*) db->buf + newlen, &zero, 1); /* null terminate */
|
||||
if (db->headers->content_length == 0) {
|
||||
/* limit the maximum buffer increase */
|
||||
if (newalloc - db->allocated > max_realloc_increase)
|
||||
newalloc = db->allocated + max_realloc_increase;
|
||||
}
|
||||
|
||||
/* ensure we have a big enough allocation */
|
||||
if (reqalloc > newalloc)
|
||||
newalloc = reqalloc;
|
||||
|
||||
newmem = realloc(db->buf, newalloc);
|
||||
if (!newmem)
|
||||
return 0;
|
||||
|
||||
db->buf = newmem;
|
||||
db->allocated = newalloc;
|
||||
}
|
||||
|
||||
memcpy(db->buf + db->len, ptr, len); /* append new data */
|
||||
memcpy(db->buf + db->len + len, &zero, 1); /* null terminate */
|
||||
|
||||
db->len += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static size_t upload_data_cb(void *ptr, size_t size, size_t nmemb,
|
||||
void *user_data)
|
||||
{
|
||||
struct upload_buffer *ub = (struct upload_buffer *) user_data;
|
||||
size_t len = size * nmemb;
|
||||
|
||||
if (len > ub->len - ub->pos)
|
||||
len = ub->len - ub->pos;
|
||||
|
||||
if (len) {
|
||||
memcpy(ptr, ((uchar*)ub->buf) + ub->pos, len);
|
||||
ub->pos += len;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
#if LIBCURL_VERSION_NUM >= 0x071200
|
||||
static int seek_data_cb(void *user_data, curl_off_t offset, int origin)
|
||||
{
|
||||
struct upload_buffer *ub = (struct upload_buffer *) user_data;
|
||||
|
||||
switch (origin) {
|
||||
case SEEK_SET:
|
||||
ub->pos = (size_t) offset;
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
ub->pos += (size_t) offset;
|
||||
break;
|
||||
case SEEK_END:
|
||||
ub->pos = ub->len + (size_t) offset;
|
||||
break;
|
||||
default:
|
||||
return 1; /* CURL_SEEKFUNC_FAIL */
|
||||
}
|
||||
|
||||
return 0; /* CURL_SEEKFUNC_OK */
|
||||
}
|
||||
#endif
|
||||
|
||||
static size_t resp_hdr_cb(void *ptr, size_t size, size_t nmemb, void *user_data)
|
||||
{
|
||||
struct header_info *hi = (struct header_info *) user_data;
|
||||
@@ -505,6 +484,9 @@ static size_t resp_hdr_cb(void *ptr, size_t size, size_t nmemb, void *user_data)
|
||||
val = NULL;
|
||||
}
|
||||
|
||||
if (!strcasecmp("Content-Length", key))
|
||||
hi->content_length = strtoul(val, NULL, 10);
|
||||
|
||||
out:
|
||||
free(key);
|
||||
free(val);
|
||||
@@ -564,48 +546,38 @@ json_t *json_rpc_call(CURL *curl, const char *url,
|
||||
int rc;
|
||||
long http_rc;
|
||||
struct data_buffer all_data = {0};
|
||||
struct upload_buffer upload_data;
|
||||
char *json_buf;
|
||||
json_error_t err;
|
||||
struct curl_slist *headers = NULL;
|
||||
char len_hdr[64];
|
||||
char curl_err_str[CURL_ERROR_SIZE] = { 0 };
|
||||
long timeout = (flags & JSON_RPC_LONGPOLL) ? opt_timeout : 30;
|
||||
struct header_info hi = {0};
|
||||
|
||||
all_data.headers = &hi;
|
||||
/* it is assumed that 'curl' is freshly [re]initialized at this pt */
|
||||
|
||||
if (opt_protocol)
|
||||
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
|
||||
if (opt_protocol) curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
if (opt_cert)
|
||||
curl_easy_setopt(curl, CURLOPT_CAINFO, opt_cert);
|
||||
//
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
|
||||
|
||||
if (opt_cert) curl_easy_setopt(curl, CURLOPT_CAINFO, opt_cert);
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
|
||||
curl_easy_setopt(curl, CURLOPT_ENCODING, "");
|
||||
curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, all_data_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data);
|
||||
curl_easy_setopt(curl, CURLOPT_READFUNCTION, upload_data_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_READDATA, &upload_data);
|
||||
#if LIBCURL_VERSION_NUM >= 0x071200
|
||||
curl_easy_setopt(curl, CURLOPT_SEEKFUNCTION, &seek_data_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_SEEKDATA, &upload_data);
|
||||
#endif
|
||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str);
|
||||
if (opt_redirect)
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str);
|
||||
if (opt_redirect) curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
||||
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, resp_hdr_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &hi);
|
||||
if (opt_proxy) {
|
||||
if (opt_proxy)
|
||||
{
|
||||
curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy);
|
||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type);
|
||||
}
|
||||
if (userpass) {
|
||||
if (userpass)
|
||||
{
|
||||
curl_easy_setopt(curl, CURLOPT_USERPWD, userpass);
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
|
||||
}
|
||||
@@ -613,23 +585,16 @@ json_t *json_rpc_call(CURL *curl, const char *url,
|
||||
if (flags & JSON_RPC_LONGPOLL)
|
||||
curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockopt_keepalive_cb);
|
||||
#endif
|
||||
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, rpc_req);
|
||||
|
||||
if (opt_protocol)
|
||||
applog(LOG_DEBUG, "JSON protocol request:\n%s\n", rpc_req);
|
||||
|
||||
upload_data.buf = rpc_req;
|
||||
upload_data.len = strlen(rpc_req);
|
||||
upload_data.pos = 0;
|
||||
sprintf(len_hdr, "Content-Length: %lu",
|
||||
(unsigned long) upload_data.len);
|
||||
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
headers = curl_slist_append(headers, len_hdr);
|
||||
headers = curl_slist_append(headers, "User-Agent: " USER_AGENT);
|
||||
headers = curl_slist_append(headers, "X-Mining-Extensions: longpoll reject-reason");
|
||||
//headers = curl_slist_append(headers, "Accept:"); /* disable Accept hdr*/
|
||||
//headers = curl_slist_append(headers, "Expect:"); /* disable Expect hdr*/
|
||||
//headers = curl_slist_append(headers, "Accept:"); // disable Accept hdr
|
||||
//headers = curl_slist_append(headers, "Expect:"); // disable Expect hdr
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
@@ -786,18 +751,26 @@ err_out:
|
||||
return cfg;
|
||||
}
|
||||
|
||||
// Segwit BEGIN
|
||||
void memrev(unsigned char *p, size_t len)
|
||||
{
|
||||
unsigned char c, *q;
|
||||
for (q = p + len - 1; p < q; p++, q--) {
|
||||
c = *p;
|
||||
*p = *q;
|
||||
*q = c;
|
||||
if ( len == 32 )
|
||||
{
|
||||
__m128i *pv = (__m128i*)p;
|
||||
__m128i t = mm128_bswap_128( pv[0] );
|
||||
pv[0] = mm128_bswap_128( pv[1] );
|
||||
pv[1] = t;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned char c, *q;
|
||||
for (q = p + len - 1; p < q; p++, q--)
|
||||
{
|
||||
c = *p;
|
||||
*p = *q;
|
||||
*q = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Segwit END
|
||||
|
||||
|
||||
void cbin2hex(char *out, const char *in, size_t len)
|
||||
{
|
||||
@@ -832,32 +805,42 @@ char *bebin2hex(const unsigned char *p, size_t len)
|
||||
return s;
|
||||
}
|
||||
|
||||
bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
|
||||
bool hex2bin( unsigned char *p, const char *hexstr, const size_t len )
|
||||
{
|
||||
char hex_byte[3];
|
||||
char *ep;
|
||||
if( hexstr == NULL ) return false;
|
||||
|
||||
hex_byte[2] = '\0';
|
||||
|
||||
while (*hexstr && len) {
|
||||
if (!hexstr[1]) {
|
||||
applog(LOG_ERR, "hex2bin str truncated");
|
||||
return false;
|
||||
}
|
||||
hex_byte[0] = hexstr[0];
|
||||
hex_byte[1] = hexstr[1];
|
||||
*p = (unsigned char) strtol(hex_byte, &ep, 16);
|
||||
if (*ep) {
|
||||
applog(LOG_ERR, "hex2bin failed on '%s'", hex_byte);
|
||||
return false;
|
||||
}
|
||||
p++;
|
||||
hexstr += 2;
|
||||
len--;
|
||||
size_t hexstr_len = strlen( hexstr );
|
||||
if( ( hexstr_len % 2 ) != 0 )
|
||||
{
|
||||
applog( LOG_ERR, "hex2bin string truncated" );
|
||||
return false;
|
||||
}
|
||||
size_t bin_len = hexstr_len / 2;
|
||||
if ( bin_len > len )
|
||||
{
|
||||
applog( LOG_ERR, "hex2bin buffer too small" );
|
||||
return false;
|
||||
}
|
||||
|
||||
return(!len) ? true : false;
|
||||
/* return (len == 0 && *hexstr == 0) ? true : false; */
|
||||
memset( p, 0, len );
|
||||
size_t i = 0;
|
||||
while ( i < hexstr_len )
|
||||
{
|
||||
char c = hexstr[i];
|
||||
unsigned char nibble;
|
||||
if ( c >= '0' && c <= '9' ) nibble = (c - '0');
|
||||
else if ( c >= 'A' && c <= 'F' ) nibble = ( 10 + (c - 'A') );
|
||||
else if ( c >= 'a' && c <= 'f' ) nibble = ( 10 + (c - 'a') );
|
||||
else
|
||||
{
|
||||
applog( LOG_ERR, "hex2bin invalid hex" );
|
||||
return false;
|
||||
}
|
||||
p[(i / 2)] |= (nibble << ( (1 - (i % 2) ) * 4) );
|
||||
i++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int varint_encode(unsigned char *p, uint64_t n)
|
||||
@@ -1339,6 +1322,43 @@ inline bool valid_hash( const void *hash, const void *target )
|
||||
|
||||
#endif
|
||||
|
||||
inline double nbits_to_diff( uint32_t nbits )
|
||||
{
|
||||
long double diff;
|
||||
uint32_t shift = nbits & 0xff;
|
||||
uint32_t bits = bswap_32( nbits ) & 0x00ffffff;
|
||||
int shift_off = (int)shift - 29;
|
||||
|
||||
// diff = ( (2**16 -1) / ( 256**shift_off * bits )
|
||||
// With uint128 byte shift is good for 16 <= shift <= 41. As unlikely
|
||||
// as this may seem necessary, check just in case.
|
||||
|
||||
if ( shift_off >= -13 && shift_off <= 12 )
|
||||
{ // fast
|
||||
if ( shift_off == 0 )
|
||||
diff = (long double)0xffff / (long double)bits;
|
||||
else if ( shift_off < 0 ) // shift < 29
|
||||
diff = (long double)( (uint128_t)0xffff << ( (-shift_off) *8 ) )
|
||||
/ (long double)bits;
|
||||
else // ( shift_off > 0 ) // shift > 29
|
||||
diff = (long double)0xffff
|
||||
/ (long double)( (uint128_t)bits << ( shift_off*8 ) );
|
||||
}
|
||||
else
|
||||
{ // slow
|
||||
int m;
|
||||
diff = 0.;
|
||||
for ( m = shift; m < 29; m++ ) diff *= 256.0;
|
||||
for ( m = 29; m < shift; m++ ) diff /= 256.0;
|
||||
}
|
||||
|
||||
if ( opt_debug )
|
||||
applog( LOG_INFO, "nbits %08x: shift %u(%d), bits %06x, diff %8g",
|
||||
nbits, shift, shift_off, bits, (double)diff );
|
||||
|
||||
return (double)diff;
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
#define socket_blocks() (WSAGetLastError() == WSAEWOULDBLOCK)
|
||||
#else
|
||||
@@ -1507,7 +1527,8 @@ out:
|
||||
return sret;
|
||||
}
|
||||
|
||||
#if LIBCURL_VERSION_NUM >= 0x071101
|
||||
#if LIBCURL_VERSION_NUM >= 0x071101 && LIBCURL_VERSION_NUM < 0x072d00
|
||||
//#if LIBCURL_VERSION_NUM >= 0x071101
|
||||
static curl_socket_t opensocket_grab_cb(void *clientp, curlsocktype purpose,
|
||||
struct curl_sockaddr *addr)
|
||||
{
|
||||
@@ -1575,7 +1596,8 @@ bool stratum_connect(struct stratum_ctx *sctx, const char *url)
|
||||
#if LIBCURL_VERSION_NUM >= 0x070f06
|
||||
curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockopt_keepalive_cb);
|
||||
#endif
|
||||
#if LIBCURL_VERSION_NUM >= 0x071101
|
||||
#if LIBCURL_VERSION_NUM >= 0x071101 && LIBCURL_VERSION_NUM < 0x072d00
|
||||
//#if LIBCURL_VERSION_NUM >= 0x071101
|
||||
curl_easy_setopt(curl, CURLOPT_OPENSOCKETFUNCTION, opensocket_grab_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_OPENSOCKETDATA, &sctx->sock);
|
||||
#endif
|
||||
@@ -1589,7 +1611,10 @@ bool stratum_connect(struct stratum_ctx *sctx, const char *url)
|
||||
return false;
|
||||
}
|
||||
|
||||
#if LIBCURL_VERSION_NUM < 0x071101
|
||||
#if LIBCURL_VERSION_NUM >= 0x072d00
|
||||
curl_easy_getinfo(curl, CURLINFO_ACTIVESOCKET, &sctx->sock);
|
||||
#elif LIBCURL_VERSION_NUM < 0x071101
|
||||
//#if LIBCURL_VERSION_NUM < 0x071101
|
||||
/* CURLINFO_LASTSOCKET is broken on Win64; only use it as a last resort */
|
||||
curl_easy_getinfo(curl, CURLINFO_LASTSOCKET, (long *)&sctx->sock);
|
||||
#endif
|
||||
@@ -1885,7 +1910,8 @@ static uint32_t getblocheight(struct stratum_ctx *sctx)
|
||||
|
||||
// find 0xffff tag
|
||||
p = (uint8_t*) sctx->job.coinbase + 32;
|
||||
m = p + 128;
|
||||
m = p + sctx->job.coinbase_size - 32 - 2;
|
||||
// m = p + 128;
|
||||
while (*p != 0xff && p < m) p++;
|
||||
while (*p == 0xff && p < m) p++;
|
||||
if (*(p-1) == 0xff && *(p-2) == 0xff) {
|
||||
@@ -1992,23 +2018,41 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
}
|
||||
}
|
||||
|
||||
if ( merkle_count )
|
||||
merkle = (uchar**) malloc( merkle_count * sizeof(char *) );
|
||||
for ( i = 0; i < merkle_count; i++ )
|
||||
{
|
||||
const char *s = json_string_value( json_array_get( merkle_arr, i ) );
|
||||
if ( !s || strlen(s) != 64 )
|
||||
{
|
||||
while ( i-- ) free( merkle[i] );
|
||||
free( merkle );
|
||||
applog( LOG_ERR, "Stratum notify: invalid Merkle branch" );
|
||||
goto out;
|
||||
}
|
||||
merkle[i] = (uchar*) malloc( 32 );
|
||||
hex2bin( merkle[i], s, 32 );
|
||||
}
|
||||
pthread_mutex_lock( &sctx->work_lock );
|
||||
|
||||
pthread_mutex_lock( &sctx->work_lock );
|
||||
if ( merkle_count )
|
||||
{
|
||||
if ( merkle_count > sctx->job.merkle_buf_size )
|
||||
{
|
||||
for ( i = 0; i < sctx->job.merkle_count; i++ )
|
||||
free( sctx->job.merkle[i] );
|
||||
free( sctx->job.merkle );
|
||||
|
||||
merkle = (uchar**) malloc( merkle_count * sizeof(char *) );
|
||||
for ( i = 0; i < merkle_count; i++ )
|
||||
merkle[i] = (uchar*) malloc( 32 );
|
||||
sctx->job.merkle_buf_size = merkle_count;
|
||||
sctx->job.merkle = merkle;
|
||||
}
|
||||
|
||||
for ( i = 0; i < merkle_count; i++ )
|
||||
{
|
||||
const char *s = json_string_value( json_array_get( merkle_arr, i ) );
|
||||
if ( !s || strlen(s) != 64 )
|
||||
{
|
||||
for ( int j = sctx->job.merkle_buf_size; j > 0; j-- )
|
||||
free( sctx->job.merkle[i] );
|
||||
free( sctx->job.merkle );
|
||||
sctx->job.merkle_count =
|
||||
sctx->job.merkle_buf_size = 0;
|
||||
pthread_mutex_unlock( &sctx->work_lock );
|
||||
applog( LOG_ERR, "Stratum notify: invalid Merkle branch" );
|
||||
goto out;
|
||||
}
|
||||
hex2bin( sctx->job.merkle[i], s, 32 );
|
||||
}
|
||||
}
|
||||
sctx->job.merkle_count = merkle_count;
|
||||
|
||||
coinb1_size = strlen( coinb1 ) / 2;
|
||||
coinb2_size = strlen( coinb2 ) / 2;
|
||||
@@ -2041,18 +2085,9 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
|
||||
}
|
||||
|
||||
sctx->block_height = getblocheight( sctx );
|
||||
|
||||
for ( i = 0; i < sctx->job.merkle_count; i++ )
|
||||
free( sctx->job.merkle[i] );
|
||||
|
||||
free( sctx->job.merkle );
|
||||
sctx->job.merkle = merkle;
|
||||
sctx->job.merkle_count = merkle_count;
|
||||
|
||||
hex2bin( sctx->job.nbits, nbits, 4 );
|
||||
hex2bin( sctx->job.ntime, stime, 4 );
|
||||
sctx->job.clean = clean;
|
||||
|
||||
sctx->job.diff = sctx->next_diff;
|
||||
|
||||
pthread_mutex_unlock( &sctx->work_lock );
|
||||
|
@@ -17,8 +17,9 @@ export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
|
||||
# used by GCC
|
||||
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
|
||||
# Support for Windows 7 CPU groups, AES sometimes not included in -march
|
||||
# Disabled due to CPU group incompatibilities between Intel and AMD CPU.
|
||||
export DEFAULT_CFLAGS="-maes -O3 -Wall -D_WIN32_WINNT=0x0601"
|
||||
# CPU groups disabled due to incompatibilities between Intel and AMD CPUs.
|
||||
#export DEFAULT_CFLAGS="-maes -O3 -Wall -D_WIN32_WINNT=0x0601"
|
||||
export DEFAULT_CFLAGS="-maes -O3 -Wall"
|
||||
export DEFAULT_CFLAGS_OLD="-O3 -Wall"
|
||||
|
||||
# make link to local gmp header file.
|
||||
@@ -46,7 +47,7 @@ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
|
||||
./clean-all.sh || echo clean
|
||||
rm -f config.status
|
||||
./autogen.sh || echo done
|
||||
CFLAGS="-march=icelake-client $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||
CFLAGS="-march=icelake-client $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
||||
@@ -54,7 +55,7 @@ mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
||||
# AVX512 AES: Intel Core HEDT Slylake-X, Cascadelake
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-march=skylake-avx512 $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||
CFLAGS="-march=skylake-avx512 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx512.exe
|
||||
@@ -62,7 +63,7 @@ mv cpuminer.exe release/cpuminer-avx512.exe
|
||||
# AVX2 SHA VAES: Intel Alderlake, AMD Zen3
|
||||
make clean || echo done
|
||||
rm -f config.status
|
||||
CFLAGS="-mavx2 -msha -mvaes $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||
CFLAGS="-mavx2 -msha -mvaes $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx2-sha-vaes.exe
|
||||
@@ -70,7 +71,7 @@ mv cpuminer.exe release/cpuminer-avx2-sha-vaes.exe
|
||||
# AVX2 AES SHA: AMD Zen1
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-march=znver1 $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||
CFLAGS="-march=znver1 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx2-sha.exe
|
||||
@@ -78,7 +79,7 @@ mv cpuminer.exe release/cpuminer-avx2-sha.exe
|
||||
# AVX2 AES: Intel Core Haswell, Skylake, Kabylake, Coffeelake, Cometlake
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-march=core-avx2 $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||
CFLAGS="-march=core-avx2 $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe release/cpuminer-avx2.exe
|
||||
@@ -128,7 +129,7 @@ make clean || echo clean
|
||||
# Native with CPU groups ennabled
|
||||
make clean || echo clean
|
||||
rm -f config.status
|
||||
CFLAGS="-march=native $DEFAULT_CFLAGS" ./configure $CONFIGURE_ARGS
|
||||
CFLAGS="-march=native $DEFAULT_CFLAGS_OLD" ./configure $CONFIGURE_ARGS
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
|
||||
|
Reference in New Issue
Block a user