This commit is contained in:
Jay D Dee
2019-05-31 13:20:12 -04:00
parent 77c5ae80ab
commit 02202ab803
19 changed files with 106 additions and 48 deletions

View File

@@ -38,6 +38,16 @@ supported.
Change Log
----------
v3.9.1.1
Fixed lyra2v3 AVX and below.
Compiling on Windows using Cygwin now works. Simply use "./build.sh"
just like on Linux. It isn't portable therefore the binaries package will
continue to use the existing procedure.
The Cygwin procedfure will be documented in more detail later and will
include a list of packages that need to be installed.
v3.9.1
Fixed AVX2 version of anime algo.

View File

@@ -11,6 +11,10 @@
#include <sys/endian.h>
#endif
#if defined(__CYGWIN__)
#include <endian.h>
#endif
#include "tmmintrin.h"
#include "smmintrin.h"

View File

@@ -8,6 +8,10 @@
#include <sys/endian.h>
#endif
#if defined(__CYGWIN__)
#include <endian.h>
#endif
#include "tmmintrin.h"
#include "smmintrin.h"
#include "immintrin.h"

View File

@@ -91,7 +91,7 @@ extern "C"{
#pragma warning (disable: 4146)
#endif
/*
static const sph_u64 RC[] = {
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
@@ -106,7 +106,7 @@ static const sph_u64 RC[] = {
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
};
*/
#define kekDECL_STATE \
sph_u64 keca00, keca01, keca02, keca03, keca04; \
sph_u64 keca10, keca11, keca12, keca13, keca14; \
@@ -756,6 +756,20 @@ static const sph_u64 RC[] = {
* tested faster saving space
*/
#define KECCAK_F_1600_ do { \
static const sph_u64 RC[] = { \
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082), \
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000), \
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001), \
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009), \
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088), \
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A), \
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B), \
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003), \
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080), \
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A), \
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080), \
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008) \
}; \
int j; \
for (j = 0; j < 24; j += 4) { \
KF_ELT( 0, 1, RC[j + 0]); \
@@ -791,7 +805,7 @@ static const sph_u64 RC[] = {
/* load initial constants */
#define KEC_I
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
//static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
/*
unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
*/
@@ -799,6 +813,7 @@ static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0
/* load hash for loop */
#define KEC_U \
do { \
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
/*memcpy(hashbuf, hash, 64); */ \
memcpy(hash + 64, keczword, 8); \
} while (0);

View File

@@ -57,7 +57,7 @@ bool init_lyra2rev2_ctx();
/////////////////////////
#if defined(__SSE4_2__)
#if defined(__SSE2__)
#define LYRA2Z_4WAY
#endif
#if defined(__AVX2__)

View File

@@ -91,7 +91,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
#elif defined(__SSE2__)
#elif defined(__SSE4_2__)
// process 2 columns in parallel
// returns void, all args updated
@@ -108,7 +108,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm128_rol1x64_256( s2, s3 ); \
mm128_ror1x64_256( s2, s3 ); \
mm128_swap128_256( s4, s5 ); \
mm128_rol1x64_256( s6, s7 ); \
G_2X64( s0, s2, s4, s6 ); \
@@ -132,7 +132,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
#endif // AVX2
#endif // AVX2 else SSE4_2
// Scalar
//Blake2b's G function

View File

@@ -30,7 +30,7 @@
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#if defined(__SSE4_2__)
#if defined(__SSE2__)
#include <stddef.h>
#include <string.h>
@@ -716,4 +716,4 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
}
#endif // __AVX2__
#endif // __SSE4_2__
#endif // __SSE2__

View File

@@ -44,7 +44,8 @@
#include "sph_types.h"
#include "avxdefs.h"
#if defined(__SSE4_2__)
#if defined(__SSE2__)
//#if defined(__SSE4_2__)
//#define SPH_SIZE_sha256 256

View File

@@ -108,7 +108,9 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
return 0;
}
#elif defined(SHA256T_4WAY)
#endif
#if defined(SHA256T_4WAY)
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));

View File

@@ -3,15 +3,15 @@
bool register_sha256t_algo( algo_gate_t* gate )
{
#if defined(SHA256T_8WAY)
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_sha256t_8way;
gate->hash = (void*)&sha256t_8way_hash;
#elif defined(SHA256T_4WAY)
gate->optimizations = SSE42_OPT | AVX2_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_sha256t_4way;
gate->hash = (void*)&sha256t_4way_hash;
#else
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
gate->scanhash = (void*)&scanhash_sha256t;
gate->hash = (void*)&sha256t_hash;
#endif

View File

@@ -6,7 +6,8 @@
// Override multi way on ryzen, SHA is better.
#if !defined(RYZEN_)
#if defined(__SSE4_2__)
//#if defined(__SSE4_2__)
#if defined(__SSE2__)
#define SHA256T_4WAY
#endif
#if defined(__AVX2__)
@@ -22,7 +23,7 @@ void sha256t_8way_hash( void *output, const void *input );
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
#elif defined (SHA256T_4WAY)
#elif defined(SHA256T_4WAY)
void sha256t_4way_hash( void *output, const void *input );
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,

View File

@@ -5,8 +5,6 @@
#include <stdio.h>
#include <openssl/sha.h>
#if !defined(SHA256T_4WAY)
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
void sha256t_midstate( const void* input )
@@ -100,4 +98,3 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
pdata[19] = n;
return 0;
}
#endif

View File

@@ -819,10 +819,7 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
int num_found = 0;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
// uint32_t *noncep = vdata + 73; // 9*8 + 1
const uint32_t Htarg = ptarget[7];
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
uint64_t htmax[] = { 0, 0xF, 0xFF,
@@ -855,18 +852,23 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
nonces[ num_found++ ] = n + lane;
work_set_target_ratio( work, lane_hash );
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE,
"Share %d submitted by thread %d, lane %d.",
accepted_share_count + rejected_share_count + 1,
thr_id, lane );
else
applog( LOG_WARNING, "Failed to submit share." );
}
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )
&& !work_restart[thr_id].restart );
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
return num_found;
return 0;
}
#endif

View File

@@ -242,8 +242,6 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
uint32_t *nonces = work->nonces;
int num_found = 0;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
const uint32_t Htarg = ptarget[7];
@@ -277,18 +275,23 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
nonces[ num_found++ ] = n + lane;
work_set_target_ratio( work, lane_hash );
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE,
"Share %d submitted by thread %d, lane %d.",
accepted_share_count + rejected_share_count + 1,
thr_id, lane );
else
applog( LOG_WARNING, "Failed to submit share." );
}
}
n += 4;
} while ( ( num_found == 0 ) && ( n < max_nonce )
&& !work_restart[thr_id].restart );
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce + 1;
return num_found;
return 0;
}
#endif

View File

@@ -528,7 +528,7 @@ static volatile uint64_t Smask2var = Smask2;
/* 64-bit without AVX. This relies on out-of-order execution and register
* renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g.,
* it runs great on Haswell. */
#warning "Note: using x86-64 inline assembly for pwxform. That's great."
//#warning "Note: using x86-64 inline assembly for pwxform. That's great."
#undef MAYBE_MEMORY_BARRIER
#define MAYBE_MEMORY_BARRIER \
__asm__("" : : : "memory");

View File

@@ -173,6 +173,7 @@ typedef union _m64_v16 m64_v16;
// Unary negate elements
#define mm64_negate_32( v ) _mm_sub_pi32( m64_zero, (__m64)v )
#define mm64_negate_16( v ) _mm_sub_pi16( m64_zero, (__m64)v )
#define mm64_negate_8( v ) _mm_sub_pi8( m64_zero, (__m64)v )
// Rotate bits in packed elements of 64 bit vector
#define mm64_rol_32( a, n ) \
@@ -206,15 +207,32 @@ typedef union _m64_v16 m64_v16;
#if defined(__SSSE3__)
// Endian byte swap packed elements
// A vectorized version of the u64 bswap, use when data already in MMX reg.
#define mm64_bswap_64( v ) \
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 0,1,2,3,4,5,6,7 ) )
#define mm64_bswap_32( v ) \
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 4,5,6,7, 0,1,2,3 ) )
#define mm64_bswap_16( v ) \
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 6,7, 4,5, 2,3, 0,1 ) );
#else
#define mm64_bswap_64( v ) \
(__m64)__builtin_bswap64( (uint64_t)v )
// Looks clumsy but hopefully it works.
#define mm64_bswap_32( v ) \
_mm_set_pi32( __builtin_bswap32( ((uint32_t*)v)[1] ), \
__builtin_bswap32( ((uint32_t*)v)[0] ) )
#endif
// Invert vector: {3,2,1,0} -> {0,1,2,3}
// Invert_64 is the same as bswap64
// Invert_32 is the same as swap32
#define mm64_invert_16( v ) _mm_shuffle_pi16( (__m64)v, 0x1b )
#if defined(__SSSE3__)
@@ -1899,7 +1917,7 @@ do { \
#endif // AVX512F
#if 0
#if 1
//////////////////////////////////////////////////
//
// Compile test.
@@ -1919,6 +1937,7 @@ static inline __m64 mmx_compile_test( __m64 a )
m = _mm_shuffle_pi8( m, (__m64)0x0102030405060708 );
i = (uint64_t) mm64_ror_32( (__m64)i, 7 );
casti_m64( n, 2 ) = m;
m = (__m64)__builtin_bswap64( (uint64_t)m );
return a;
}

22
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.1.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.1.1.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.9.1'
PACKAGE_STRING='cpuminer-opt 3.9.1'
PACKAGE_VERSION='3.9.1.1'
PACKAGE_STRING='cpuminer-opt 3.9.1.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.9.1 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.9.1.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.9.1:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.9.1.1:";;
esac
cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.9.1
cpuminer-opt configure 3.9.1.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.9.1, which was
It was created by cpuminer-opt $as_me 3.9.1.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.9.1'
VERSION='3.9.1.1'
cat >>confdefs.h <<_ACEOF
@@ -5884,7 +5884,7 @@ fi
# GC2 for GNU static
if test "x$OS" = "xWindows_NT" ; then
if test "x$have_win32" = "xtrue" ; then
# MinGW
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
$as_echo_n "checking for pthread_create in -lpthread... " >&6; }
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.9.1, which was
This file was extended by cpuminer-opt $as_me 3.9.1.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.9.1
cpuminer-opt config.status 3.9.1.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.9.1])
AC_INIT([cpuminer-opt], [3.9.1.1])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
@@ -106,7 +106,7 @@ fi
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
# GC2 for GNU static
if test "x$OS" = "xWindows_NT" ; then
if test "x$have_win32" = "xtrue" ; then
# MinGW
AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthreadGC2",[])
else

View File

@@ -19,7 +19,7 @@ export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --with-crypto=$LOCAL_LIB/open
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
# edit configure to fix pthread lib name for Windows.
sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
#sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
# make release directory and copy selected DLLs.
mkdir release