mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.7
This commit is contained in:
@@ -38,6 +38,26 @@ supported.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.9.7
|
||||
|
||||
Command line option changes:
|
||||
|
||||
"-R" is no longer used as a shortcut for "--retry-pause", users must
|
||||
use the long option.
|
||||
|
||||
New options:
|
||||
|
||||
-N, --param-n: set the N parameter for yescrypt, yespower or scrypt algos
|
||||
-R, --param-r: set the R parameter for yescrypt or yespower algos, scrypt is
|
||||
hardcoded with R=1
|
||||
-K, --param-key: set the client key/pers parameter for yescrypt/yespower algos.
|
||||
|
||||
These options can be used to mine yescrypt or yespower variations using
|
||||
the generic yescrypt or yespower algo name and specifying the parameters
|
||||
manually. They can even be used to mine variations that aren't formally
|
||||
supported by a unique algo name. Existing algos can continue to to be mined
|
||||
using their original name without parameters.
|
||||
|
||||
v3.9.6.2
|
||||
|
||||
New algo blake2b.
|
||||
|
@@ -786,10 +786,14 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
||||
gate->set_target = (void*)&scrypt_set_target;
|
||||
gate->get_max64 = (void*)&scrypt_get_max64;
|
||||
|
||||
if ( !opt_scrypt_n )
|
||||
if ( !opt_param_n )
|
||||
{
|
||||
opt_param_n = 1024;
|
||||
scratchbuf_size = 1024;
|
||||
}
|
||||
else
|
||||
scratchbuf_size = opt_scrypt_n;
|
||||
scratchbuf_size = opt_param_n;
|
||||
applog(LOG_INFO,"Scrypt paramaters: N= %d, R= 1.", opt_param_n );
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@@ -55,6 +55,7 @@ typedef uint32_t scrypt_mix_word_t;
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
|
||||
/* cpu agnostic */
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
|
||||
#define SCRYPT_MIX_FN chacha_core_basic
|
||||
|
@@ -1,9 +1,11 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX)
|
||||
|
||||
/*
|
||||
#if defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
#undef SCRYPT_ROMIX_FN
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix
|
||||
#endif
|
||||
*/
|
||||
|
||||
#undef SCRYPT_HAVE_ROMIX
|
||||
#define SCRYPT_HAVE_ROMIX
|
||||
|
@@ -244,20 +244,20 @@ bool register_scryptjane_algo( algo_gate_t* gate )
|
||||
gate->get_max64 = (void*)&get_max64_0x40LL;
|
||||
|
||||
// figure out if arg in N or Nfactor
|
||||
if ( !opt_scrypt_n )
|
||||
if ( !opt_param_n )
|
||||
{
|
||||
applog( LOG_ERR, "The N factor must be specified in the form algo:nf");
|
||||
return false;
|
||||
}
|
||||
else if ( opt_scrypt_n < 32 )
|
||||
else if ( opt_param_n < 32 )
|
||||
{
|
||||
// arg is Nfactor, calculate N
|
||||
sj_N = 1 << ( opt_scrypt_n + 1 );
|
||||
sj_N = 1 << ( opt_param_n + 1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// arg is N
|
||||
sj_N = opt_scrypt_n;
|
||||
sj_N = opt_param_n;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@@ -438,11 +438,32 @@ bool register_yescrypt_algo( algo_gate_t* gate )
|
||||
{
|
||||
yescrypt_gate_base( gate );
|
||||
gate->get_max64 = (void*)&yescrypt_get_max64;
|
||||
|
||||
if ( opt_param_n ) YESCRYPT_N = opt_param_n;
|
||||
else YESCRYPT_N = 2048;
|
||||
|
||||
if ( opt_param_r ) YESCRYPT_R = opt_param_r;
|
||||
else YESCRYPT_R = 8;
|
||||
|
||||
if ( opt_param_key )
|
||||
{
|
||||
yescrypt_client_key = opt_param_key;
|
||||
yescrypt_client_key_len = strlen( opt_param_key );
|
||||
}
|
||||
else
|
||||
{
|
||||
yescrypt_client_key = NULL;
|
||||
yescrypt_client_key_len = 0;
|
||||
YESCRYPT_N = 2048;
|
||||
YESCRYPT_R = 8;
|
||||
}
|
||||
|
||||
YESCRYPT_P = 1;
|
||||
|
||||
applog(LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d.", YESCRYPT_N,
|
||||
YESCRYPT_R );
|
||||
if ( yescrypt_client_key )
|
||||
applog(LOG_NOTICE,"Key= ""%s"", len= %d.\n", yescrypt_client_key,
|
||||
yescrypt_client_key_len );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -78,10 +78,30 @@ int64_t yespower_get_max64()
|
||||
bool register_yespower_algo( algo_gate_t* gate )
|
||||
{
|
||||
yespower_params.version = YESPOWER_1_0;
|
||||
yespower_params.N = 2048;
|
||||
yespower_params.r = 32;
|
||||
|
||||
if ( opt_param_n ) yespower_params.N = opt_param_n;
|
||||
else yespower_params.N = 2048;
|
||||
|
||||
if ( opt_param_r ) yespower_params.r = opt_param_r;
|
||||
else yespower_params.r = 32;
|
||||
|
||||
if ( opt_param_key )
|
||||
{
|
||||
yespower_params.pers = opt_param_key;
|
||||
yespower_params.perslen = strlen( opt_param_key );
|
||||
}
|
||||
else
|
||||
{
|
||||
yespower_params.pers = NULL;
|
||||
yespower_params.perslen = 0;
|
||||
}
|
||||
|
||||
applog(LOG_NOTICE,"Yespower parameters: N= %d, R= %d.", yespower_params.N,
|
||||
yespower_params.r );
|
||||
if ( yespower_params.pers )
|
||||
applog(LOG_NOTICE,"Key= ""%s"", len= %d.\n", yespower_params.pers,
|
||||
(int)yespower_params.perslen );
|
||||
|
||||
gate->optimizations = SSE2_OPT;
|
||||
gate->get_max64 = (void*)&yespower_get_max64;
|
||||
gate->scanhash = (void*)&scanhash_yespower;
|
||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.6.2.
|
||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.7.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='cpuminer-opt'
|
||||
PACKAGE_TARNAME='cpuminer-opt'
|
||||
PACKAGE_VERSION='3.9.6.2'
|
||||
PACKAGE_STRING='cpuminer-opt 3.9.6.2'
|
||||
PACKAGE_VERSION='3.9.7'
|
||||
PACKAGE_STRING='cpuminer-opt 3.9.7'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures cpuminer-opt 3.9.6.2 to adapt to many kinds of systems.
|
||||
\`configure' configures cpuminer-opt 3.9.7 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1404,7 +1404,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.9.6.2:";;
|
||||
short | recursive ) echo "Configuration of cpuminer-opt 3.9.7:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1509,7 +1509,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
cpuminer-opt configure 3.9.6.2
|
||||
cpuminer-opt configure 3.9.7
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by cpuminer-opt $as_me 3.9.6.2, which was
|
||||
It was created by cpuminer-opt $as_me 3.9.7, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2993,7 +2993,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='cpuminer-opt'
|
||||
VERSION='3.9.6.2'
|
||||
VERSION='3.9.7'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by cpuminer-opt $as_me 3.9.6.2, which was
|
||||
This file was extended by cpuminer-opt $as_me 3.9.7, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
cpuminer-opt config.status 3.9.6.2
|
||||
cpuminer-opt config.status 3.9.7
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
AC_INIT([cpuminer-opt], [3.9.6.2])
|
||||
AC_INIT([cpuminer-opt], [3.9.7])
|
||||
|
||||
AC_PREREQ([2.59c])
|
||||
AC_CANONICAL_SYSTEM
|
||||
|
25
cpu-miner.c
25
cpu-miner.c
@@ -102,7 +102,9 @@ int opt_timeout = 300;
|
||||
static int opt_scantime = 5;
|
||||
//static const bool opt_time = true;
|
||||
enum algos opt_algo = ALGO_NULL;
|
||||
int opt_scrypt_n = 0;
|
||||
char* opt_param_key = NULL;
|
||||
int opt_param_n = 0;
|
||||
int opt_param_r = 0;
|
||||
int opt_pluck_n = 128;
|
||||
int opt_n_threads = 0;
|
||||
// Windows doesn't support 128 bit affinity mask.
|
||||
@@ -176,7 +178,7 @@ static char const short_options[] =
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
"S"
|
||||
#endif
|
||||
"a:b:Bc:CDf:hm:n:p:Px:qr:R:s:t:T:o:u:O:V";
|
||||
"a:b:Bc:CDf:hK:m:n:N:p:Px:qr:R:s:t:T:o:u:O:V";
|
||||
|
||||
static struct work g_work __attribute__ ((aligned (64))) = {{ 0 }};
|
||||
//static struct work tmp_work;
|
||||
@@ -2860,7 +2862,7 @@ void parse_arg(int key, char *arg )
|
||||
if (*ep || v < 2)
|
||||
continue;
|
||||
opt_algo = (enum algos) i;
|
||||
opt_scrypt_n = v;
|
||||
opt_param_n = v;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -2943,7 +2945,9 @@ void parse_arg(int key, char *arg )
|
||||
show_usage_and_exit(1);
|
||||
opt_retries = v;
|
||||
break;
|
||||
case 'R':
|
||||
// case 'R':
|
||||
// applog(LOG_WARNING,"\n-R is no longer valid, use --retry-pause instead.");
|
||||
case 1025:
|
||||
v = atoi(arg);
|
||||
if (v < 1 || v > 9999) /* sanity check */
|
||||
show_usage_and_exit(1);
|
||||
@@ -3153,6 +3157,18 @@ void parse_arg(int key, char *arg )
|
||||
show_usage_and_exit(1);
|
||||
opt_priority = v;
|
||||
break;
|
||||
case 'N': // N parameter for various scrypt algos
|
||||
d = atoi( arg );
|
||||
opt_param_n = d;
|
||||
break;
|
||||
case 'R': // R parameter for various scrypt algos
|
||||
d = atoi( arg );
|
||||
opt_param_r = d;
|
||||
break;
|
||||
case 'K': // Client key for various algos
|
||||
free( opt_param_key );
|
||||
opt_param_key = strdup( arg );
|
||||
break;
|
||||
case 1060: // max-temp
|
||||
d = atof(arg);
|
||||
opt_max_temp = d;
|
||||
@@ -3178,6 +3194,7 @@ void parse_arg(int key, char *arg )
|
||||
show_version_and_exit();
|
||||
case 'h':
|
||||
show_usage_and_exit(0);
|
||||
|
||||
default:
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
|
15
miner.h
15
miner.h
@@ -729,7 +729,9 @@ extern double stratum_diff;
|
||||
extern double net_diff;
|
||||
extern double net_hashrate;
|
||||
extern int opt_pluck_n;
|
||||
extern int opt_scrypt_n;
|
||||
extern int opt_param_n;
|
||||
extern int opt_param_r;
|
||||
extern char* opt_param_key;
|
||||
extern double opt_diff_factor;
|
||||
extern bool opt_randomize;
|
||||
extern bool allow_mininginfo;
|
||||
@@ -843,6 +845,9 @@ Options:\n\
|
||||
yespower Cryply\n\
|
||||
yespowerr16 Yenten (YTN)\n\
|
||||
zr5 Ziftr\n\
|
||||
-N, --param-n N parameter for scrypt based algos\n\
|
||||
-R, --patam-r R parameter for scrypt based algos\n\
|
||||
-K, --param-key Key parameter for algos that use it\n\
|
||||
-o, --url=URL URL of mining server\n\
|
||||
-O, --userpass=U:P username:password pair for mining server\n\
|
||||
-u, --user=USERNAME username for mining server\n\
|
||||
@@ -852,7 +857,7 @@ Options:\n\
|
||||
-t, --threads=N number of miner threads (default: number of processors)\n\
|
||||
-r, --retries=N number of times to retry if a network call fails\n\
|
||||
(default: retry indefinitely)\n\
|
||||
-R, --retry-pause=N time to pause between retries, in seconds (default: 30)\n\
|
||||
--retry-pause=N time to pause between retries, in seconds (default: 30)\n\
|
||||
--time-limit=N maximum time [s] to mine before exiting the program.\n\
|
||||
-T, --timeout=N timeout for long poll and stratum (default: 300 seconds)\n\
|
||||
-s, --scantime=N upper bound on time spent scanning current work when\n\
|
||||
@@ -927,6 +932,7 @@ static struct option const options[] = {
|
||||
{ "hash-meter", 0, NULL, 1014 },
|
||||
{ "hide-diff", 0, NULL, 1013 },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ "key", 1, NULL, 'K' },
|
||||
{ "no-gbt", 0, NULL, 1011 },
|
||||
{ "no-getwork", 0, NULL, 1010 },
|
||||
{ "no-longpoll", 0, NULL, 1003 },
|
||||
@@ -936,13 +942,16 @@ static struct option const options[] = {
|
||||
{ "max-temp", 1, NULL, 1060 },
|
||||
{ "max-diff", 1, NULL, 1061 },
|
||||
{ "max-rate", 1, NULL, 1062 },
|
||||
{ "param-key", 1, NULL, 'K' },
|
||||
{ "param-n", 1, NULL, 'N' },
|
||||
{ "param-r", 1, NULL, 'R' },
|
||||
{ "pass", 1, NULL, 'p' },
|
||||
{ "protocol", 0, NULL, 'P' },
|
||||
{ "protocol-dump", 0, NULL, 'P' },
|
||||
{ "proxy", 1, NULL, 'x' },
|
||||
{ "quiet", 0, NULL, 'q' },
|
||||
{ "retries", 1, NULL, 'r' },
|
||||
{ "retry-pause", 1, NULL, 'R' },
|
||||
{ "retry-pause", 1, NULL, 1025 },
|
||||
{ "randomize", 0, NULL, 1024 },
|
||||
{ "scantime", 1, NULL, 's' },
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
|
@@ -175,7 +175,6 @@
|
||||
|
||||
// 64 bit vectors
|
||||
#include "simd-utils/simd-64.h"
|
||||
//#include "simd-utils/intrlv-mmx.h"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
|
||||
@@ -189,6 +188,8 @@
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
// Utilities that require AVX2 are defined in simd-256.h.
|
||||
|
||||
// Skylake-X has all these
|
||||
#if defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
|
@@ -477,13 +477,13 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, void *src )
|
||||
__m256i s0 = mm256_bswap_32( casti_m256i( src,0 ) );
|
||||
__m256i s1 = mm256_bswap_32( casti_m256i( src,1 ) );
|
||||
__m128i s2 = mm128_bswap_32( casti_m128i( src,4 ) );
|
||||
const __m256i zero = m256_zero;
|
||||
// const __m256i zero = m256_zero;
|
||||
const __m256i one = m256_one_32;
|
||||
const __m256i two = _mm256_add_epi32( one, one );
|
||||
const __m256i three = _mm256_add_epi32( two, one );
|
||||
const __m256i four = _mm256_add_epi32( two, two );
|
||||
|
||||
casti_m256i( d, 0 ) = _mm256_permutevar8x32_epi32( s0, zero );
|
||||
casti_m256i( d, 0 ) = _mm256_permutevar8x32_epi32( s0, m256_zero );
|
||||
casti_m256i( d, 1 ) = _mm256_permutevar8x32_epi32( s0, one );
|
||||
casti_m256i( d, 2 ) = _mm256_permutevar8x32_epi32( s0, two );
|
||||
casti_m256i( d, 3 ) = _mm256_permutevar8x32_epi32( s0, three );
|
||||
@@ -494,7 +494,7 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, void *src )
|
||||
_mm256_add_epi32( four, two ) );
|
||||
casti_m256i( d, 7 ) = _mm256_permutevar8x32_epi32( s0,
|
||||
_mm256_add_epi32( four, three ) );
|
||||
casti_m256i( d, 8 ) = _mm256_permutevar8x32_epi32( s1, zero );
|
||||
casti_m256i( d, 8 ) = _mm256_permutevar8x32_epi32( s1, m256_zero );
|
||||
casti_m256i( d, 9 ) = _mm256_permutevar8x32_epi32( s1, one );
|
||||
casti_m256i( d,10 ) = _mm256_permutevar8x32_epi32( s1, two );
|
||||
casti_m256i( d,11 ) = _mm256_permutevar8x32_epi32( s1, three );
|
||||
@@ -506,7 +506,7 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, void *src )
|
||||
casti_m256i( d,15 ) = _mm256_permutevar8x32_epi32( s1,
|
||||
_mm256_add_epi32( four, three ) );
|
||||
casti_m256i( d,16 ) = _mm256_permutevar8x32_epi32(
|
||||
_mm256_castsi128_si256( s2 ), zero );
|
||||
_mm256_castsi128_si256( s2 ), m256_zero );
|
||||
casti_m256i( d,17 ) = _mm256_permutevar8x32_epi32(
|
||||
_mm256_castsi128_si256( s2 ), one );
|
||||
casti_m256i( d,18 ) = _mm256_permutevar8x32_epi32(
|
||||
@@ -874,17 +874,6 @@ static inline void extr_lane_4x64( void *d, const void *s,
|
||||
((uint64_t*)d)[ 5] = ((uint64_t*)s)[ lane+20 ];
|
||||
((uint64_t*)d)[ 6] = ((uint64_t*)s)[ lane+24 ];
|
||||
((uint64_t*)d)[ 7] = ((uint64_t*)s)[ lane+28 ];
|
||||
/*
|
||||
if ( bit_len <= 256 ) return;
|
||||
((uint64_t*)d)[ 8] = ((uint64_t*)s)[ lane+32 ];
|
||||
((uint64_t*)d)[ 9] = ((uint64_t*)s)[ lane+36 ];
|
||||
((uint64_t*)d)[10] = ((uint64_t*)s)[ lane+40 ];
|
||||
((uint64_t*)d)[11] = ((uint64_t*)s)[ lane+44 ];
|
||||
((uint64_t*)d)[12] = ((uint64_t*)s)[ lane+48 ];
|
||||
((uint64_t*)d)[13] = ((uint64_t*)s)[ lane+52 ];
|
||||
((uint64_t*)d)[14] = ((uint64_t*)s)[ lane+56 ];
|
||||
((uint64_t*)d)[15] = ((uint64_t*)s)[ lane+60 ];
|
||||
*/
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
@@ -991,17 +980,6 @@ static inline void extr_lane_8x64( void *d, const void *s,
|
||||
((uint64_t*)d)[ 5] = ((uint64_t*)s)[ lane+ 40 ];
|
||||
((uint64_t*)d)[ 6] = ((uint64_t*)s)[ lane+ 48 ];
|
||||
((uint64_t*)d)[ 7] = ((uint64_t*)s)[ lane+ 56 ];
|
||||
/*
|
||||
if ( bit_len <= 256 ) return;
|
||||
((uint64_t*)d)[ 8] = ((uint64_t*)s)[ lane+ 64 ];
|
||||
((uint64_t*)d)[ 9] = ((uint64_t*)s)[ lane+ 72 ];
|
||||
((uint64_t*)d)[10] = ((uint64_t*)s)[ lane+ 80 ];
|
||||
((uint64_t*)d)[11] = ((uint64_t*)s)[ lane+ 88 ];
|
||||
((uint64_t*)d)[12] = ((uint64_t*)s)[ lane+ 96 ];
|
||||
((uint64_t*)d)[13] = ((uint64_t*)s)[ lane+104 ];
|
||||
((uint64_t*)d)[14] = ((uint64_t*)s)[ lane+112 ];
|
||||
((uint64_t*)d)[15] = ((uint64_t*)s)[ lane+120 ];
|
||||
*/
|
||||
}
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__)
|
||||
|
@@ -565,57 +565,73 @@ do { \
|
||||
|
||||
#define mm128_ror1x64_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_srli_si128( v1, 8 ) | _mm_slli_si128( v2, 8 ); \
|
||||
v2 = _mm_srli_si128( v2, 8 ) | _mm_slli_si128( v1, 8 ); \
|
||||
__m128i t = _mm_or_si128( _mm_srli_si128( v1, 8 ), \
|
||||
_mm_slli_si128( v2, 8 ) ); \
|
||||
v2 = _mm_or_si128( _mm_srli_si128( v2, 8 ), \
|
||||
_mm_slli_si128( v1, 8 ) ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_rol1x64_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_slli_si128( v1, 8 ) | _mm_srli_si128( v2, 8 ); \
|
||||
v2 = _mm_slli_si128( v2, 8 ) | _mm_srli_si128( v1, 8 ); \
|
||||
__m128i t = _mm_or_si128( _mm_slli_si128( v1, 8 ), \
|
||||
_mm_srli_si128( v2, 8 ) ); \
|
||||
v2 = _mm_or_si128( _mm_slli_si128( v2, 8 ), \
|
||||
_mm_srli_si128( v1, 8 ) ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_ror1x32_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_srli_si128( v1, 4 ) | _mm_slli_si128( v2, 12 ); \
|
||||
v2 = _mm_srli_si128( v2, 4 ) | _mm_slli_si128( v1, 12 ); \
|
||||
__m128i t = _mm_or_si128( _mm_srli_si128( v1, 4 ), \
|
||||
_mm_slli_si128( v2, 12 ) ); \
|
||||
v2 = _mm_or_si128( _mm_srli_si128( v2, 4 ), \
|
||||
_mm_slli_si128( v1, 12 ) ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_rol1x32_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_slli_si128( v1, 4 ) | _mm_srli_si128( v2, 12 ); \
|
||||
v2 = _mm_slli_si128( v2, 4 ) | _mm_srli_si128( v1, 12 ); \
|
||||
__m128i t = _mm_or_si128( _mm_slli_si128( v1, 4 ), \
|
||||
_mm_srli_si128( v2, 12 ) ); \
|
||||
v2 = _mm_or_si128( _mm_slli_si128( v2, 4 ), \
|
||||
_mm_srli_si128( v1, 12 ) ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_ror1x16_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_srli_si128( v1, 2 ) | _mm_slli_si128( v2, 14 ); \
|
||||
v2 = _mm_srli_si128( v2, 2 ) | _mm_slli_si128( v1, 14 ); \
|
||||
__m128i t = _mm_or_si128( _mm_srli_si128( v1, 2 ), \
|
||||
_mm_slli_si128( v2, 14 ) ); \
|
||||
v2 = _mm_or_si128( _mm_srli_si128( v2, 2 ), \
|
||||
_mm_slli_si128( v1, 14 ) ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_rol1x16_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_slli_si128( v1, 2 ) | _mm_srli_si128( v2, 14 ); \
|
||||
v2 = _mm_slli_si128( v2, 2 ) | _mm_srli_si128( v1, 14 ); \
|
||||
__m128i t = _mm_or_si128( _mm_slli_si128( v1, 2 ), \
|
||||
_mm_srli_si128( v2, 14 ) ); \
|
||||
v2 = _mm_or_si128( _mm_slli_si128( v2, 2 ), \
|
||||
_mm_srli_si128( v1, 14 ) ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_ror1x8_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_srli_si128( v1, 1 ) | _mm_slli_si128( v2, 15 ); \
|
||||
v2 = _mm_srli_si128( v2, 1 ) | _mm_slli_si128( v1, 15 ); \
|
||||
__m128i t = _mm_or_si128( _mm_srli_si128( v1, 1 ), \
|
||||
_mm_slli_si128( v2, 15 ) ); \
|
||||
v2 = _mm_or_si128( _mm_srli_si128( v2, 1 ), \
|
||||
_mm_slli_si128( v1, 15 ) ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
#define mm128_rol1x8_256( v1, v2 ) \
|
||||
do { \
|
||||
__m128i t = _mm_slli_si128( v1, 1 ) | _mm_srli_si128( v2, 15 ); \
|
||||
v2 = _mm_slli_si128( v2, 1 ) | _mm_srli_si128( v1, 15 ); \
|
||||
__m128i t = _mm_or_si128( _mm_slli_si128( v1, 1 ), \
|
||||
_mm_srli_si128( v2, 15 ) ); \
|
||||
v2 = _mm_or_si128( _mm_slli_si128( v2, 1 ), \
|
||||
_mm_srli_si128( v1, 15 ) ); \
|
||||
v1 = t; \
|
||||
} while(0)
|
||||
|
||||
|
@@ -33,7 +33,8 @@
|
||||
// cast all arguments as the're likely to be uint64_t
|
||||
|
||||
// Bitwise not: ~(a)
|
||||
#define mm64_not( a ) _mm_xor_si64( (__m64)a, m64_neg1 )
|
||||
//#define mm64_not( a ) _mm_xor_si64( (__m64)a, m64_neg1 )
|
||||
#define mm64_not( a ) ( (__m64)( ~( (uint64_t)(a) ) )
|
||||
|
||||
// Unary negate elements
|
||||
#define mm64_negate_32( v ) _mm_sub_pi32( m64_zero, (__m64)v )
|
||||
|
@@ -34,7 +34,7 @@
|
||||
(uint32_t)( ( (uint32_t)(x) << (c) ) | ( (uint32_t)(x) >> (32-(c)) ) )
|
||||
#define u16_ror_16( x, c ) \
|
||||
(uint16_t)( ( (uint16_t)(x) >> (c) ) | ( (uint16_t)(x) << (16-(c)) ) )
|
||||
#define u16rol_16( x, c ) \
|
||||
#define u16_rol_16( x, c ) \
|
||||
(uint16_t)( ( (uint16_t)(x) << (c) ) | ( (uint16_t)(x) >> (16-(c)) ) )
|
||||
#define u8_ror_8( x, c ) \
|
||||
(uint8_t) ( ( (uint8_t) (x) >> (c) ) | ( (uint8_t) (x) << ( 8-(c)) ) )
|
||||
|
Reference in New Issue
Block a user