mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4fa8fcea8b |
@@ -65,6 +65,12 @@ If not what makes it happen or not happen?
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.15.1
|
||||
|
||||
Fix compile on AMD Zen3 CPUs with VAES.
|
||||
Force new work immediately after solving a block solo.
|
||||
|
||||
|
||||
v3.15.0
|
||||
|
||||
Fugue optimized with AES, improves many sha3 algos.
|
||||
|
@@ -34,13 +34,11 @@ MYALIGN const unsigned long long _supermix4c[] = {0x0706050403020000, 0x03020000
|
||||
MYALIGN const unsigned long long _supermix7a[] = {0x010c0b060d080702, 0x0904030e03000104};
|
||||
MYALIGN const unsigned long long _supermix7b[] = {0x8080808080808080, 0x0504070605040f06};
|
||||
MYALIGN const unsigned long long _k_n[] = {0x4E4E4E4E4E4E4E4E, 0x1B1B1B1B0E0E0E0E};
|
||||
MYALIGN const unsigned int _maskd3n[] = {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000};
|
||||
MYALIGN const unsigned char _shift_one_mask[] = {7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14, 3, 0, 1, 2};
|
||||
MYALIGN const unsigned char _shift_four_mask[] = {13, 14, 15, 12, 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8};
|
||||
MYALIGN const unsigned char _shift_seven_mask[] = {10, 11, 8, 9, 14, 15, 12, 13, 2, 3, 0, 1, 6, 7, 4, 5};
|
||||
MYALIGN const unsigned char _aes_shift_rows[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
|
||||
MYALIGN const unsigned int _inv_shift_rows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
|
||||
MYALIGN const unsigned int _zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int _mul2mask[] = {0x1b1b0000, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int _mul4mask[] = {0x2d361b00, 0x00000000, 0x00000000, 0x00000000};
|
||||
MYALIGN const unsigned int _lsbmask2[] = {0x03030303, 0x03030303, 0x03030303, 0x03030303};
|
||||
@@ -61,7 +59,7 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
|
||||
#define UNPACK_S0(s0, s1, t1)\
|
||||
s1 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s1), _mm_castsi128_ps(s0), 0xc0));\
|
||||
s0 = _mm_and_si128(s0, M128(_maskd3n))
|
||||
s0 = mm128_mask_32( s0, 8 )
|
||||
|
||||
#define CMIX(s1, s2, r1, r2, t1, t2)\
|
||||
t1 = s1;\
|
||||
@@ -78,7 +76,7 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
#define UNPACK_S0(s0, s1, t1)\
|
||||
t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 3, 3));\
|
||||
s1 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s1), _mm_castsi128_ps(t1)));\
|
||||
s0 = _mm_and_si128(s0, M128(_maskd3n))
|
||||
s0 = mm128_mask_32( s0, 8 )
|
||||
|
||||
#define CMIX(s1, s2, r1, r2, t1, t2)\
|
||||
t1 = _mm_shuffle_epi32(s1, 0xf9);\
|
||||
@@ -138,7 +136,7 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
|
||||
#define SUBSTITUTE(r0, _t1, _t2, _t3, _t0)\
|
||||
_t2 = _mm_shuffle_epi8(r0, M128(_inv_shift_rows));\
|
||||
_t2 = _mm_aesenclast_si128(_t2, M128(_zero))
|
||||
_t2 = _mm_aesenclast_si128( _t2, m128_zero )
|
||||
|
||||
#define SUPERMIX(t0, t1, t2, t3, t4)\
|
||||
PRESUPERMIX(t0, t1, t2, t3, t4);\
|
||||
@@ -181,14 +179,14 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
|
||||
_t0 = _mm_shuffle_epi32(r1c, 0x39);\
|
||||
r2c = _mm_xor_si128(r2c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r2d = _mm_xor_si128(r2d, _t0);\
|
||||
UNPACK_S0(r1c, r1a, _t3);\
|
||||
SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
|
||||
_t0 = _mm_shuffle_epi32(r2c, 0x39);\
|
||||
r3c = _mm_xor_si128(r3c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r3d = _mm_xor_si128(r3d, _t0);\
|
||||
UNPACK_S0(r2c, r2a, _t3);\
|
||||
SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\
|
||||
@@ -203,21 +201,21 @@ MYALIGN const unsigned int _IV512[] = {
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
|
||||
_t0 = _mm_shuffle_epi32(r1c, 0x39);\
|
||||
r2c = _mm_xor_si128(r2c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r2d = _mm_xor_si128(r2d, _t0);\
|
||||
UNPACK_S0(r1c, r1a, _t3);\
|
||||
SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
|
||||
_t0 = _mm_shuffle_epi32(r2c, 0x39);\
|
||||
r3c = _mm_xor_si128(r3c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r3d = _mm_xor_si128(r3d, _t0);\
|
||||
UNPACK_S0(r2c, r2a, _t3);\
|
||||
SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\
|
||||
SUPERMIX(_t2, _t3, _t0, _t1, r3c);\
|
||||
_t0 = _mm_shuffle_epi32(r3c, 0x39);\
|
||||
r4c = _mm_xor_si128(r4c, _t0);\
|
||||
_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
|
||||
_t0 = mm128_mask_32( _t0, 8 ); \
|
||||
r4d = _mm_xor_si128(r4d, _t0);\
|
||||
UNPACK_S0(r3c, r3a, _t3);\
|
||||
SUBSTITUTE(r4c, _t1, _t2, _t3, _t0);\
|
||||
@@ -462,7 +460,7 @@ HashReturn fugue512_Init(hashState_fugue *ctx, int nHashSize)
|
||||
ctx->uBlockLength = 4;
|
||||
|
||||
for(i = 0; i < 6; i++)
|
||||
ctx->state[i] = _mm_setzero_si128();
|
||||
ctx->state[i] = m128_zero;
|
||||
|
||||
ctx->state[6] = _mm_load_si128((__m128i*)_IV512 + 0);
|
||||
ctx->state[7] = _mm_load_si128((__m128i*)_IV512 + 1);
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#if defined(__AES__)
|
||||
|
||||
#include "algo/sha/sha3_common.h"
|
||||
#include <x86intrin.h>
|
||||
#include "simd-utils.h"
|
||||
|
||||
|
||||
typedef struct
|
||||
|
@@ -7,13 +7,13 @@
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
|
||||
#if !defined(GROESTL256_INTR_4WAY_H__)
|
||||
#define GROESTL256_INTR_4WAY_H__ 1
|
||||
|
||||
#include "groestl256-hash-4way.h"
|
||||
|
||||
#if defined(__VAES__)
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
static const __m128i round_const_l0[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
{ 0x7060504030201000, 0xffffffffffffffff },
|
||||
|
@@ -7,13 +7,12 @@
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
|
||||
#if !defined(GROESTL512_INTR_4WAY_H__)
|
||||
#define GROESTL512_INTR_4WAY_H__ 1
|
||||
|
||||
#include "groestl512-hash-4way.h"
|
||||
|
||||
#if defined(__VAES__)
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
static const __m128i round_const_p[] __attribute__ ((aligned (64))) =
|
||||
{
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/cubehash/cubehash_sse2.h"
|
||||
#include "lyra2.h"
|
||||
#if defined(__VAES__)
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#include "algo/echo/echo-hash-4way.h"
|
||||
#elif defined(__AES__)
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
|
@@ -42,7 +42,6 @@ void init_phi1612_ctx()
|
||||
sph_skein512_init( &phi_ctx.skein );
|
||||
sph_jh512_init( &phi_ctx.jh );
|
||||
cubehashInit( &phi_ctx.cube, 512, 16, 32 );
|
||||
sph_fugue512_init( &phi_ctx.fugue );
|
||||
sph_gost512_init( &phi_ctx.gost );
|
||||
#ifdef __AES__
|
||||
init_echo( &phi_ctx.echo, 512 );
|
||||
|
@@ -7,6 +7,7 @@
|
||||
#include <stdio.h>
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/bmw/sph_bmw.h"
|
||||
//#include "algo/jh/jh-hash-sse2.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
@@ -49,6 +50,7 @@ struct TortureGarden
|
||||
sph_blake512_context blake;
|
||||
sph_bmw512_context bmw;
|
||||
sph_skein512_context skein;
|
||||
// jh512_sse2_hashState jh;
|
||||
sph_jh512_context jh;
|
||||
sph_keccak512_context keccak;
|
||||
hashState_luffa luffa;
|
||||
@@ -125,6 +127,7 @@ static void get_hash( void *output, const void *input, TortureGarden *garden,
|
||||
SHA512_Final( (unsigned char*)hash, &garden->sha512 );
|
||||
break;
|
||||
case 8:
|
||||
// jh512_sse2_full( &garden->jh, hash, input, 64 );
|
||||
sph_jh512_init(&garden->jh);
|
||||
sph_jh512(&garden->jh, input, 64);
|
||||
sph_jh512_close(&garden->jh, hash);
|
||||
|
@@ -4,7 +4,7 @@
|
||||
# during develpment. However the information contained may provide compilation
|
||||
# tips to users.
|
||||
|
||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen > /dev/null
|
||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-aes-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 > /dev/null
|
||||
|
||||
make distclean || echo clean
|
||||
rm -f config.status
|
||||
@@ -87,6 +87,16 @@ mv cpuminer.exe cpuminer-zen.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-zen
|
||||
|
||||
make clean || echo done
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=znver2 -mvaes -Wall -fno-common" ./configure --with-curl
|
||||
# CFLAGS="-O3 -march=znver3 -Wall -fno-common" ./configure --with-curl
|
||||
make -j 8
|
||||
strip -s cpuminer.exe
|
||||
mv cpuminer.exe cpuminer-zen3.exe
|
||||
strip -s cpuminer
|
||||
mv cpuminer cpuminer-zen3
|
||||
|
||||
make clean || echo done
|
||||
rm -f config.status
|
||||
CFLAGS="-O3 -march=native -Wall -fno-common" ./configure --with-curl
|
||||
|
@@ -1,10 +1,9 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# imake clean and rm all the targetted executables.
|
||||
# tips to users.
|
||||
# make clean and rm all the targetted executables.
|
||||
|
||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 > /dev/null
|
||||
rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 cpuminer-zen3 > /dev/null
|
||||
|
||||
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42 cpuminer-ssse3 > /dev/null
|
||||
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-zen3.exe > /dev/null
|
||||
|
||||
make distclean > /dev/null
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.15.0.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.15.1.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.15.0'
|
||||
PACKAGE_STRING='cpuminer-opt 3.15.0'
|
||||
PACKAGE_VERSION='3.15.1'
|
||||
PACKAGE_STRING='cpuminer-opt 3.15.1'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.15.0 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.15.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.15.0:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.15.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.15.0
|
||||
cpuminer-opt configure 3.15.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.15.0, which was
|
||||
It was created by cpuminer-opt $as_me 3.15.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.15.0'
|
||||
VERSION='3.15.1'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.15.0, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.15.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.15.0
|
||||
cpuminer-opt config.status 3.15.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.15.0])
|
||||
AC_INIT([cpuminer-opt], [3.15.1])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
19
cpu-miner.c
19
cpu-miner.c
@@ -1829,10 +1829,11 @@ bool submit_solution( struct work *work, const void *hash,
|
||||
update_submit_stats( work, hash );
|
||||
|
||||
if unlikely( !have_stratum && !have_longpoll )
|
||||
{ // block solved, force getwork
|
||||
{ // solo, block solved, force getwork
|
||||
pthread_rwlock_wrlock( &g_work_lock );
|
||||
g_work_time = 0;
|
||||
pthread_rwlock_unlock( &g_work_lock );
|
||||
restart_threads();
|
||||
}
|
||||
|
||||
if ( !opt_quiet )
|
||||
@@ -1868,25 +1869,25 @@ static bool wanna_mine(int thr_id)
|
||||
bool state = true;
|
||||
|
||||
if (opt_max_temp > 0.0)
|
||||
{
|
||||
{
|
||||
float temp = cpu_temp(0);
|
||||
if (temp > opt_max_temp)
|
||||
{
|
||||
{
|
||||
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
|
||||
applog(LOG_INFO, "temperature too high (%.0fC), waiting...", temp);
|
||||
state = false;
|
||||
}
|
||||
}
|
||||
if (opt_max_diff > 0.0 && net_diff > opt_max_diff)
|
||||
{
|
||||
{
|
||||
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
|
||||
applog(LOG_INFO, "network diff too high, waiting...");
|
||||
state = false;
|
||||
}
|
||||
if (opt_max_rate > 0.0 && net_hashrate > opt_max_rate)
|
||||
{
|
||||
{
|
||||
if (!thr_id && !conditional_state[thr_id] && !opt_quiet)
|
||||
{
|
||||
{
|
||||
char rate[32];
|
||||
format_hashrate(opt_max_rate, rate);
|
||||
applog(LOG_INFO, "network hashrate too high, waiting %s...", rate);
|
||||
@@ -1903,7 +1904,7 @@ static bool wanna_mine(int thr_id)
|
||||
// default
|
||||
void sha256d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
{
|
||||
sha256d(merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size);
|
||||
sha256d( merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size );
|
||||
for ( int i = 0; i < sctx->job.merkle_count; i++ )
|
||||
{
|
||||
memcpy( merkle_root + 32, sctx->job.merkle[i], 32 );
|
||||
@@ -2038,7 +2039,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
|
||||
{
|
||||
unsigned char *xnonce2str = abin2hex( g_work->xnonce2,
|
||||
g_work->xnonce2_len );
|
||||
applog( LOG_INFO, "Extranonce %s, Block %d, Net Diff %.5g",
|
||||
applog( LOG_INFO, "Extranonce2 %s, Block %d, Net Diff %.5g",
|
||||
xnonce2str, sctx->block_height, net_diff );
|
||||
free( xnonce2str );
|
||||
}
|
||||
@@ -3509,7 +3510,7 @@ bool check_cpu_capability ()
|
||||
use_avx2 = cpu_has_avx2 && sw_has_avx2 && algo_has_avx2;
|
||||
use_avx512 = cpu_has_avx512 && sw_has_avx512 && algo_has_avx512;
|
||||
use_sha = cpu_has_sha && sw_has_sha && algo_has_sha;
|
||||
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes;
|
||||
use_vaes = cpu_has_vaes && sw_has_vaes && algo_has_vaes && use_avx512;
|
||||
use_none = !( use_sse2 || use_aes || use_sse42 || use_avx512 || use_avx2 ||
|
||||
use_sha || use_vaes );
|
||||
|
||||
|
@@ -135,11 +135,17 @@ static inline __m128i mm128_neg1_fn()
|
||||
// Bitwise not (~v)
|
||||
#define mm128_not( v ) _mm_xor_si128( (v), m128_neg1 )
|
||||
|
||||
// Unary negation of elements
|
||||
// Unary negation of elements (-v)
|
||||
#define mm128_negate_64( v ) _mm_sub_epi64( m128_zero, v )
|
||||
#define mm128_negate_32( v ) _mm_sub_epi32( m128_zero, v )
|
||||
#define mm128_negate_16( v ) _mm_sub_epi16( m128_zero, v )
|
||||
|
||||
// Clear (zero) 32 bit elements based on bits set in 4 bit mask.
|
||||
// Fast, avoids using vector mask, but only available for 128 bit vectors.
|
||||
#define mm128_mask_32( a, mask ) \
|
||||
_mm_castps_si128( _mm_insert_ps( _mm_castsi128_ps( a ), \
|
||||
_mm_castsi128_ps( a ), mask ) )
|
||||
|
||||
// Add 4 values, fewer dependencies than sequential addition.
|
||||
#define mm128_add4_64( a, b, c, d ) \
|
||||
_mm_add_epi64( _mm_add_epi64( a, b ), _mm_add_epi64( c, d ) )
|
||||
@@ -269,11 +275,8 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
// Rotate vector elements accross all lanes
|
||||
|
||||
#define mm128_swap_64( v ) _mm_shuffle_epi32( v, 0x4e )
|
||||
|
||||
#define mm128_ror_1x32( v ) _mm_shuffle_epi32( v, 0x39 )
|
||||
#define mm128_rol_1x32( v ) _mm_shuffle_epi32( v, 0x93 )
|
||||
|
||||
|
||||
//#define mm128_swap_64( v ) _mm_alignr_epi8( v, v, 8 )
|
||||
//#define mm128_ror_1x32( v ) _mm_alignr_epi8( v, v, 4 )
|
||||
//#define mm128_rol_1x32( v ) _mm_alignr_epi8( v, v, 12 )
|
||||
@@ -282,53 +285,11 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
#define mm128_ror_1x8( v ) _mm_alignr_epi8( v, v, 1 )
|
||||
#define mm128_rol_1x8( v ) _mm_alignr_epi8( v, v, 15 )
|
||||
|
||||
// Rotate by c bytes
|
||||
#define mm128_ror_x8( v, c ) _mm_alignr_epi8( v, c )
|
||||
#define mm128_rol_x8( v, c ) _mm_alignr_epi8( v, 16-(c) )
|
||||
|
||||
|
||||
/*
|
||||
// Rotate 16 byte (128 bit) vector by c bytes.
|
||||
// Less efficient using shift but more versatile. Use only for odd number
|
||||
// byte rotations. Use shuffle above whenever possible.
|
||||
#define mm128_ror_x8( v, c ) \
|
||||
_mm_or_si128( _mm_srli_si128( v, c ), _mm_slli_si128( v, 16-(c) ) )
|
||||
|
||||
#define mm128_rol_x8( v, c ) \
|
||||
_mm_or_si128( _mm_slli_si128( v, c ), _mm_srli_si128( v, 16-(c) ) )
|
||||
|
||||
#if defined (__SSE3__)
|
||||
// no SSE2 implementation, no current users
|
||||
|
||||
#define mm128_ror_1x16( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x01000f0e0d0c0b0a, \
|
||||
0x0908070605040302 ) )
|
||||
#define mm128_rol_1x16( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x0d0c0b0a09080706, \
|
||||
0x0504030201000f0e ) )
|
||||
#define mm128_ror_1x8( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x000f0e0d0c0b0a09, \
|
||||
0x0807060504030201 ) )
|
||||
#define mm128_rol_1x8( v ) \
|
||||
_mm_shuffle_epi8( v, m128_const_64( 0x0e0d0c0b0a090807, \
|
||||
0x060504030201000f ) )
|
||||
#else // SSE2
|
||||
|
||||
#define mm128_ror_1x16( v ) \
|
||||
_mm_or_si128( _mm_srli_si128( v, 2 ), _mm_slli_si128( v, 14 ) )
|
||||
|
||||
#define mm128_rol_1x16( v ) \
|
||||
_mm_or_si128( _mm_slli_si128( v, 2 ), _mm_srli_si128( v, 14 ) )
|
||||
|
||||
#define mm128_ror_1x8( v ) \
|
||||
_mm_or_si128( _mm_srli_si128( v, 1 ), _mm_slli_si128( v, 15 ) )
|
||||
|
||||
#define mm128_rol_1x8( v ) \
|
||||
_mm_or_si128( _mm_slli_si128( v, 1 ), _mm_srli_si128( v, 15 ) )
|
||||
|
||||
#endif // SSE3 else SSE2
|
||||
*/
|
||||
|
||||
|
||||
// Invert vector: {3,2,1,0} -> {0,1,2,3}
|
||||
#define mm128_invert_32( v ) _mm_shuffle_epi32( v, 0x1b )
|
||||
|
||||
|
@@ -26,8 +26,6 @@
|
||||
#define mm256_concat_128( hi, lo ) \
|
||||
_mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 1 )
|
||||
|
||||
#define m256_const1_128( v ) \
|
||||
_mm256_broadcastsi128_si256( v )
|
||||
|
||||
// Equavalent of set, move 64 bit integer constants to respective 64 bit
|
||||
// elements.
|
||||
@@ -144,10 +142,11 @@ do { \
|
||||
|
||||
// Parallel AES, for when x is expected to be in a 256 bit register.
|
||||
// Use same 128 bit key.
|
||||
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
//#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if 0
|
||||
#define mm256_aesenc_2x128( x, k ) \
|
||||
_mm256_aesenc_epi128( x, m256_const1_128(k ) )
|
||||
_mm256_aesenc_epi128( x, k )
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -56,15 +56,15 @@
|
||||
// If an expensive constant is to be reused in the same function it should
|
||||
// be declared as a local variable defined once and reused.
|
||||
//
|
||||
// Permutations cab be very exppensive if they use a vector control index,
|
||||
// Permutations can be very expensive if they use a vector control index,
|
||||
// even if the permutation itself is quite efficient.
|
||||
// The index is essentially a constant with all the baggage that brings.
|
||||
// The same rules apply, if an index is to be reused it should be defined
|
||||
// as a local. This applies specifically to bswap operations.
|
||||
//
|
||||
// Additionally, permutations using smaller vectors can be more efficient
|
||||
// if the permutation doesn't cross lane boundaries ,typically 128 bits,
|
||||
// ans the smnaller vector can use an imm comtrol.
|
||||
// if the permutation doesn't cross lane boundaries, typically 128 bits,
|
||||
// and the smnaller vector can use an imm comtrol.
|
||||
//
|
||||
// If the permutation doesn't cross lane boundaries a shuffle instructions
|
||||
// can be used with imm control instead of permute.
|
||||
@@ -182,7 +182,10 @@ static inline __m512i m512_const4_64( const uint64_t i3, const uint64_t i2,
|
||||
//
|
||||
// Basic operations without SIMD equivalent
|
||||
|
||||
// ~x
|
||||
#define mm512_not( x ) _mm512_xor_si512( x, m512_neg1 )
|
||||
|
||||
// -x
|
||||
#define mm512_negate_64( x ) _mm512_sub_epi64( m512_zero, x )
|
||||
#define mm512_negate_32( x ) _mm512_sub_epi32( m512_zero, x )
|
||||
#define mm512_negate_16( x ) _mm512_sub_epi16( m512_zero, x )
|
||||
@@ -443,20 +446,13 @@ static inline void memcpy_512( __m512i *dst, const __m512i *src, const int n )
|
||||
//
|
||||
// Rotate elements within 256 bit lanes of 512 bit vector.
|
||||
|
||||
// Rename these for consistency. Element size is always last.
|
||||
// mm<vectorsize>_<op><lanesize>_<elementsize>
|
||||
|
||||
|
||||
// Swap hi & lo 128 bits in each 256 bit lane
|
||||
|
||||
#define mm512_swap256_128( v ) _mm512_permutex_epi64( v, 0x4e )
|
||||
|
||||
// Rotate 256 bit lanes by one 64 bit element
|
||||
|
||||
#define mm512_ror256_64( v ) _mm512_permutex_epi64( v, 0x39 )
|
||||
#define mm512_rol256_64( v ) _mm512_permutex_epi64( v, 0x93 )
|
||||
|
||||
|
||||
// Rotate 256 bit lanes by one 32 bit element
|
||||
|
||||
#define mm512_ror256_32( v ) \
|
||||
|
25
sysinfos.c
25
sysinfos.c
@@ -331,16 +331,20 @@ static inline void cpu_getmodelid(char *outbuf, size_t maxsz)
|
||||
// Feature flags
|
||||
|
||||
// CPU_INFO ECX
|
||||
#define XSAVE_Flag (1<<26)
|
||||
#define OSXSAVE_Flag (1<<27)
|
||||
#define AVX_Flag (1<<28)
|
||||
#define SSE3_Flag 1
|
||||
#define SSSE3_Flag (1<< 9)
|
||||
#define XOP_Flag (1<<11)
|
||||
#define FMA3_Flag (1<<12)
|
||||
#define AES_Flag (1<<25)
|
||||
#define SSE41_Flag (1<<19)
|
||||
#define SSE42_Flag (1<<20)
|
||||
#define AES_Flag (1<<25)
|
||||
#define XSAVE_Flag (1<<26)
|
||||
#define OSXSAVE_Flag (1<<27)
|
||||
#define AVX_Flag (1<<28)
|
||||
|
||||
// CPU_INFO EDX
|
||||
#define SSE_Flag (1<<25) // EDX
|
||||
#define SSE_Flag (1<<25)
|
||||
#define SSE2_Flag (1<<26)
|
||||
|
||||
// EXTENDED_FEATURES EBX
|
||||
@@ -359,8 +363,8 @@ static inline void cpu_getmodelid(char *outbuf, size_t maxsz)
|
||||
|
||||
// Use this to detect presence of feature
|
||||
#define AVX_mask (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag)
|
||||
#define FMA3_mask (FMA3_Flag|AVX_mask)
|
||||
#define AVX512_mask (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag)
|
||||
#define FMA3_mask (FMA3_Flag|AVX_mask)
|
||||
#define AVX512_mask (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag)
|
||||
|
||||
static inline bool has_sha()
|
||||
{
|
||||
@@ -476,6 +480,15 @@ static inline bool has_avx512()
|
||||
#endif
|
||||
}
|
||||
|
||||
// AMD Zen3 added support for 256 bit VAES without requiring AVX512.
|
||||
// The original Intel spec requires AVX512F to support 512 bit VAES and
|
||||
// requires AVX512VL to support 256 bit VAES.
|
||||
// cpuminer-opt only uses VAES512, simply testing the VAES bit is sufficient.
|
||||
// However, proper detection of VAES512 and VAES256 requires more work:
|
||||
// VAES512 = VAES && AVX512F (may not support VAES256)
|
||||
// VAES256 = AVX512VL ? VAES : ( AVX && VAES ) (may not support VAES512)
|
||||
// VAES = VAES && AVX512F && AVX512VL (supports both)
|
||||
|
||||
static inline bool has_vaes()
|
||||
{
|
||||
#ifdef __arm__
|
||||
|
11
util.c
11
util.c
@@ -1485,9 +1485,12 @@ static bool stratum_parse_extranonce(struct stratum_ctx *sctx, json_t *params, i
|
||||
sctx->xnonce2_size = xn2_size;
|
||||
pthread_mutex_unlock(&sctx->work_lock);
|
||||
|
||||
if (pndx == 0 && opt_debug) /* pool dynamic change */
|
||||
applog(LOG_DEBUG, "Stratum set nonce %s with extranonce2 size=%d",
|
||||
xnonce1, xn2_size);
|
||||
if ( !opt_quiet ) /* pool dynamic change */
|
||||
applog( LOG_INFO, "Stratum extranonce1= %s, extranonce2 size= %d",
|
||||
xnonce1, xn2_size);
|
||||
// if (pndx == 0 && opt_debug)
|
||||
// applog(LOG_DEBUG, "Stratum set nonce %s with extranonce2 size=%d",
|
||||
// xnonce1, xn2_size);
|
||||
|
||||
return true;
|
||||
out:
|
||||
@@ -1581,8 +1584,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern bool opt_extranonce;
|
||||
|
||||
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass)
|
||||
{
|
||||
json_t *val = NULL, *res_val, *err_val;
|
||||
|
Reference in New Issue
Block a user