Compare commits

...

3 Commits
v27.4 ... v25.2

Author SHA1 Message Date
Jay D Dee
1ed18bf22e v25.2 2025-01-12 18:58:21 -05:00
Jay D Dee
1d9341ee92 v25.1 2024-12-30 21:33:04 -05:00
Jay D Dee
a45a333b40 v24.8 2024-12-25 23:12:29 -05:00
48 changed files with 1422 additions and 4185 deletions

View File

@@ -1,19 +1,39 @@
if WANT_JANSSON
JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson
if HAVE_APPLE
# MacOS uses Homebrew to install needed packages but they aren't linked for
# the jansson test in configure. Ignore the failed test & link them now,
# different path for different CPU arch.
if ARCH_ARM64
EXTRA_INCLUDES = -I/opt/homebrew/include
EXTRA_LIBS = -L/opt/homebrew/lib
else
JANSSON_INCLUDES=
EXTRA_INCLUDES = -I/usr/local/include
EXTRA_LIBS = -L/usr/local/lib
endif
EXTRA_DIST = example-cfg.json nomacro.pl
else
SUBDIRS = compat
if WANT_JANSSON
# Can't find jansson libraries, compile the included source code.
EXTRA_INCLUDES = -I$(top_srcdir)/compat/jansson
EXTRA_LIBS = -L$(top_srcdir)/compat/jansson
else
EXTRA_INCLUDES =
EXTRA_LIBS =
endif
ALL_INCLUDES = @PTHREAD_FLAGS@ -fno-strict-aliasing $(JANSSON_INCLUDES) -I.
endif
bin_PROGRAMS = cpuminer
EXTRA_DIST = example-cfg.json nomacro.pl
dist_man_MANS = cpuminer.1
SUBDIRS = compat
ALL_INCLUDES = @PTHREAD_FLAGS@ -fno-strict-aliasing $(EXTRA_INCLUDES) -I.
bin_PROGRAMS = cpuminer
dist_man_MANS = cpuminer.1
cpuminer_SOURCES = \
dummy.cpp \
@@ -166,8 +186,6 @@ cpuminer_SOURCES = \
algo/shavite/sph-shavite-aesni.c \
algo/shavite/shavite-hash-2way.c \
algo/shavite/shavite-hash-4way.c \
algo/simd/nist.c \
algo/simd/vector.c \
algo/simd/sph_simd.c \
algo/simd/simd-hash-2way.c \
algo/skein/sph_skein.c \
@@ -274,25 +292,29 @@ cpuminer_SOURCES = \
algo/yespower/yespower-opt.c \
algo/yespower/yespower-ref.c \
algo/yespower/yespower-blake2b-ref.c
disable_flags =
if USE_ASM
cpuminer_SOURCES += asm/neoscrypt_asm.S
else
disable_flags += -DNOASM
endif
if HAVE_WINDOWS
cpuminer_SOURCES += compat/winansi.c
endif
cpuminer_LDFLAGS = @LDFLAGS@
cpuminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp
if USE_ASM
disable_flags =
cpuminer_SOURCES += asm/neoscrypt_asm.S
else
disable_flags += -DNOASM
endif
cpuminer_LDFLAGS = @LDFLAGS@
cpuminer_LDADD = $(EXTRA_LIBS) @LIBCURL@ -ljansson @PTHREAD_LIBS@ @WS2_LIBS@ -lgmp
cpuminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(ALL_INCLUDES)
cpuminer_CFLAGS = -Wno-pointer-sign -Wno-pointer-to-int-cast $(disable_flags)
cpuminer_CFLAGS = -Wno-pointer-sign -Wno-pointer-to-int-cast $(disable_flags)
if ARCH_ARM64
cpuminer_CFLAGS += -flax-vector-conversions
endif
if HAVE_WINDOWS
# use to profile an object
# gprof_cflags = -pg -g3
# cpuminer_LDFLAGS += -pg
@@ -306,5 +328,4 @@ cpuminer-neoscrypt.o: neoscrypt.c
@echo "CUSTOM ${@}: ${filter %.o,${^}} ${filter %.c,${^}}"
$(CC) $(common_ccflags) -g -O3 $(gprof_cflags) -MT $@ -MD -MP -c -o $@ $<
endif

View File

@@ -36,34 +36,18 @@ for compile instructions.
Requirements
------------
1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
Intel Core2 and newer and AMD equivalents. Further optimizations are available
on some algoritms for CPUs with AES, AVX, AVX2, SHA, AVX512 and VAES.
32 bit CPUs are not supported.
Other CPU architectures such as ARM, Raspberry Pi, RISC-V, Xeon Phi, etc,
are not supported.
1. A 64 bit CPU supporting x86_64 (Intel or AMD) or aarch64 (ARM).
x86_64 requires SSE2, aarch64 requires armv8 & NEON.
Mobile CPUs like laptop computers are not recommended because they aren't
designed for extreme heat of operating at full load for extended periods of
time.
Older CPUs and ARM architecture may be supported by cpuminer-multi by TPruvot.
2. 64 bit Linux or Windows OS. Ubuntu and Fedora based distributions,
including Mint and Centos, are known to work and have all dependencies
in their repositories. Others may work but may require more effort. Older
versions such as Centos 6 don't work due to missing features.
Windows 7 or newer is supported with mingw_w64 and msys or using the pre-built
binaries. WindowsXP 64 bit is YMMV.
FreeBSD is not actively tested but should work, YMMV.
MacOS, OSx and Android are not supported.
2. 64 bit operating system including Linux, Windows, MacOS, or BSD.
Android, IOS and alt OSs like Haiku & ReactOS are not supported.
3. Stratum pool supporting stratum+tcp:// or stratum+ssl:// protocols or
RPC getwork using http:// or https://.
GBT is YMMV.
RPC getblockte,plate using http:// or https://.
Supported Algorithms
--------------------

View File

@@ -75,6 +75,31 @@ If not what makes it happen or not happen?
Change Log
----------
v25.2
ARM: Fixed regression from v25.1 that could cause build fail.
BSD: FreeBSD is now supported. Other BSDs may also work.
MacOS: build with installed jansson library instead of compiling the included source code.
Windows: remove "_WIN32_WINNT=0x0601" which is a downgrade on Win11.
Changed build.sh shell from bash to sh.
v25.1
MacOS ARM64: m7m algo is now working.
MacOS ARM64: can now be compiled with GCC.
MacOS x86_64: is now working compiled with GCC.
Fixed some minor bugs & removed some obsolete code.
v24.8
ARM: Apple MacOS on M series CPU is now supported compiled from source
code, see Wiki for details.
ARM: Fix incorrect compiler version display when using clang.
build.sh can now be used to compile all targets, arm_build.sh & build_msys2.sh
have been removed.
Windows: MSys2 build now enables CPU groups by default, prebuilt binaries
continue to be compiled with CPU groups disabled.
v24.7
ARM: compile works for Windows using MSys2 & MingW, see wiki for details.

View File

@@ -161,29 +161,25 @@ keccak64_8way_core( keccak64_ctx_m512i *kc, const void *data, size_t len,
static void keccak64_8way_close( keccak64_ctx_m512i *kc, void *dst,
size_t byte_len, size_t lim )
{
unsigned eb;
union {
__m512i tmp[lim + 1];
uint64_t dummy; /* for alignment */
} u;
__m512i tmp[lim + 1] __attribute__ ((aligned (64)));
size_t j;
size_t m512_len = byte_len >> 3;
const unsigned eb = hard_coded_eb;
eb = hard_coded_eb;
if ( kc->ptr == (lim - 8) )
{
const uint64_t t = eb | 0x8000000000000000;
u.tmp[0] = _mm512_set1_epi64( t );
tmp[0] = _mm512_set1_epi64( t );
j = 8;
}
else
{
j = lim - kc->ptr;
u.tmp[0] = _mm512_set1_epi64( eb );
memset_zero_512( u.tmp + 1, (j>>3) - 2 );
u.tmp[ (j>>3) - 1] = _mm512_set1_epi64( 0x8000000000000000 );
tmp[0] = _mm512_set1_epi64( eb );
memset_zero_512( tmp + 1, (j>>3) - 2 );
tmp[ (j>>3) - 1] = _mm512_set1_epi64( 0x8000000000000000 );
}
keccak64_8way_core( kc, u.tmp, j, lim );
keccak64_8way_core( kc, tmp, j, lim );
/* Finalize the "lane complement" */
NOT64( kc->w[ 1], kc->w[ 1] );
NOT64( kc->w[ 2], kc->w[ 2] );
@@ -361,29 +357,25 @@ keccak64_core( keccak64_ctx_m256i *kc, const void *data, size_t len,
static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
size_t lim )
{
unsigned eb;
union {
__m256i tmp[lim + 1];
uint64_t dummy; /* for alignment */
} u;
__m256i tmp[lim + 1] __attribute__ ((aligned (32)));
size_t j;
size_t m256_len = byte_len >> 3;
const unsigned eb = hard_coded_eb;
eb = hard_coded_eb;
if ( kc->ptr == (lim - 8) )
{
const uint64_t t = eb | 0x8000000000000000;
u.tmp[0] = _mm256_set1_epi64x( t );
tmp[0] = _mm256_set1_epi64x( t );
j = 8;
}
else
{
j = lim - kc->ptr;
u.tmp[0] = _mm256_set1_epi64x( eb );
memset_zero_256( u.tmp + 1, (j>>3) - 2 );
u.tmp[ (j>>3) - 1] = _mm256_set1_epi64x( 0x8000000000000000 );
tmp[0] = _mm256_set1_epi64x( eb );
memset_zero_256( tmp + 1, (j>>3) - 2 );
tmp[ (j>>3) - 1] = _mm256_set1_epi64x( 0x8000000000000000 );
}
keccak64_core( kc, u.tmp, j, lim );
keccak64_core( kc, tmp, j, lim );
/* Finalize the "lane complement" */
NOT64( kc->w[ 1], kc->w[ 1] );
NOT64( kc->w[ 2], kc->w[ 2] );

View File

@@ -1,8 +1,6 @@
#include "cpuminer-config.h"
#include "algo-gate-api.h"
#if !defined(__APPLE__)
#include <gmp.h>
#include <stdbool.h>
#include <stdlib.h>
@@ -33,6 +31,7 @@ static inline double exp_n( double xt )
return exp( xt );
}
/*
static inline double exp_n2( double x1, double x2 )
{
double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8,
@@ -53,6 +52,7 @@ static inline double exp_n2( double x1, double x2 )
else if ( xt > p6 - 1.e-200 )
return 0.;
}
*/
double swit2_( double wvnmb )
{
@@ -298,14 +298,8 @@ int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
return 0;
}
#endif // not apple
bool register_m7m_algo( algo_gate_t *gate )
{
#if defined(__APPLE__)
applog( LOG_ERR, "M7M algo is not supported on MacOS");
return false;
#else
gate->optimizations = SHA256_OPT;
init_m7m_ctx();
gate->scanhash = (void*)&scanhash_m7m_hash;
@@ -315,6 +309,5 @@ bool register_m7m_algo( algo_gate_t *gate )
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
opt_target_factor = 65536.0;
return true;
#endif
}

View File

@@ -11,7 +11,6 @@
#include "algo/luffa/luffa-hash-2way.h"
#include "algo/cubehash/cube-hash-2way.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/nist.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/shavite/shavite-hash-2way.h"
#include "algo/simd/simd-hash-2way.h"
@@ -617,9 +616,9 @@ union _hmq1725_4way_context_overlay
cubehashParam cube;
cube_2way_context cube2;
sph_shavite512_context shavite;
hashState_sd sd;
simd512_context simd;
shavite512_2way_context shavite2;
simd_2way_context simd;
simd_2way_context simd_2way;
hashState_echo echo;
hamsi512_4way_context hamsi;
hashState_fugue fugue;
@@ -753,8 +752,8 @@ extern void hmq1725_4way_hash(void *state, const void *input)
shavite512_2way_full( &ctx.shavite2, vhashA, vhashA, 64 );
shavite512_2way_full( &ctx.shavite2, vhashB, vhashB, 64 );
simd512_2way_full( &ctx.simd, vhashA, vhashA, 64 );
simd512_2way_full( &ctx.simd, vhashB, vhashB, 64 );
simd512_2way_full( &ctx.simd_2way, vhashA, vhashA, 64 );
simd512_2way_full( &ctx.simd_2way, vhashB, vhashB, 64 );
rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 );
@@ -869,41 +868,25 @@ extern void hmq1725_4way_hash(void *state, const void *input)
echo_full( &ctx.echo, (BitSequence *)hash0, 512,
(const BitSequence *)hash0, 64 );
else
{
init_sd( &ctx.sd, 512 );
update_final_sd( &ctx.sd, (BitSequence *)hash0,
(const BitSequence *)hash0, 512 );
}
simd512_ctx( &ctx.simd, hash0, hash0, 64 );
if ( hash1[0] & mask ) //4
echo_full( &ctx.echo, (BitSequence *)hash1, 512,
(const BitSequence *)hash1, 64 );
else
{
init_sd( &ctx.sd, 512 );
update_final_sd( &ctx.sd, (BitSequence *)hash1,
(const BitSequence *)hash1, 512 );
}
simd512_ctx( &ctx.simd, hash1, hash1, 64 );
if ( hash2[0] & mask ) //4
echo_full( &ctx.echo, (BitSequence *)hash2, 512,
(const BitSequence *)hash2, 64 );
else
{
init_sd( &ctx.sd, 512 );
update_final_sd( &ctx.sd, (BitSequence *)hash2,
(const BitSequence *)hash2, 512 );
}
simd512_ctx( &ctx.simd, hash2, hash2, 64 );
if ( hash3[0] & mask ) //4
echo_full( &ctx.echo, (BitSequence *)hash3, 512,
(const BitSequence *)hash3, 64 );
else
{
init_sd( &ctx.sd, 512 );
update_final_sd( &ctx.sd, (BitSequence *)hash3,
(const BitSequence *)hash3, 512 );
}
simd512_ctx( &ctx.simd, hash3, hash3, 64 );
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );

View File

@@ -46,7 +46,7 @@
#endif
#ifdef __GNUC__
#if defined(NOASM) || defined(__arm__) || defined(__aarch64__)
#if defined(NOASM) || defined(__arm__) || defined(__aarch64__) || defined(__APPLE__)
#define ASM 0
#else
#define ASM 1

View File

@@ -1,472 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "nist.h"
#include "simd_iv.h"
/* #define NO_PRECOMPUTED_IV */
#if defined(__SSE2__) // || defined(__ARM_NEON)
/*
* Increase the counter.
*/
void IncreaseCounter(hashState_sd *state, DataLength databitlen) {
#ifdef HAS_64
state->count += databitlen;
#else
uint32_t old_count = state->count_low;
state->count_low += databitlen;
if (state->count_low < old_count)
state->count_high++;
#endif
}
/*
* Initialize the hashState_sd with a given IV.
* If the IV is NULL, initialize with zeros.
*/
int InitIV(hashState_sd *state, int hashbitlen, const u32 *IV) {
int n = 8;
state->hashbitlen = hashbitlen;
state->n_feistels = n;
state->blocksize = 128*8;
#ifdef HAS_64
state->count = 0;
#else
state->count_low = 0;
state->count_high = 0;
#endif
// state->buffer = malloc(16*n + 16);
/*
* Align the buffer to a 128 bit boundary.
*/
// state->buffer += ((unsigned char*)NULL - state->buffer)&15;
// state->A = malloc((4*n+4)*sizeof(u32));
/*
* Align the buffer to a 128 bit boundary.
*/
// state->A += ((u32*)NULL - state->A)&3;
state->B = state->A+n;
state->C = state->B+n;
state->D = state->C+n;
if (IV)
memcpy(state->A, IV, 4*n*sizeof(u32));
else
memset(state->A, 0, 4*n*sizeof(u32));
// free(state->buffer);
// free(state->A);
return 0;
}
/*
* Initialize the hashState_sd.
*/
int init_sd(hashState_sd *state, int hashbitlen) {
int r;
char *init;
#ifndef NO_PRECOMPUTED_IV
// if (hashbitlen == 224)
// r=InitIV(state, hashbitlen, IV_224);
// else if (hashbitlen == 256)
// r=InitIV(state, hashbitlen, IV_256);
// else if (hashbitlen == 384)
// r=InitIV(state, hashbitlen, IV_384);
// else
if (hashbitlen == 512)
r = InitIV(state, hashbitlen, IV_512);
else
#endif
{
/*
* Nonstandart length: IV is not precomputed.
*/
r=InitIV(state, hashbitlen, NULL);
if (r != 0)
return r;
init = malloc(state->blocksize);
memset(init, 0, state->blocksize);
#if defined __STDC__ && __STDC_VERSION__ >= 199901L
snprintf(init, state->blocksize, "SIMD-%i v1.1", hashbitlen);
#else
sprintf(init, "SIMD-%i v1.1", hashbitlen);
#endif
SIMD_Compress(state, (unsigned char*) init, 0);
free(init);
}
return r;
}
int update_sd( hashState_sd *state, const BitSequence *data,
DataLength databitlen )
{
unsigned current;
unsigned int bs = state->blocksize;
static int align = -1;
if (align == -1)
align = RequiredAlignment();
#ifdef HAS_64
current = state->count & (bs - 1);
#else
current = state->count_low & (bs - 1);
#endif
if ( current & 7 )
{
// The number of hashed bits is not a multiple of 8.
// Very painfull to implement and not required by the NIST API.
return 1;
}
while ( databitlen > 0 )
{
if ( IS_ALIGNED(data,align) && current == 0 && databitlen >= bs )
{
// We can hash the data directly from the input buffer.
SIMD_Compress(state, data, 0);
databitlen -= bs;
data += bs/8;
IncreaseCounter(state, bs);
}
else
{
// Copy a chunk of data to the buffer
unsigned int len = bs - current;
if ( databitlen < len )
{
memcpy( state->buffer+current/8, data, (databitlen+7)/8 );
IncreaseCounter( state, databitlen );
return 0;
}
else
{
memcpy( state->buffer+current/8, data, len/8 );
IncreaseCounter( state,len );
databitlen -= len;
data += len/8;
current = 0;
SIMD_Compress( state, state->buffer, 0 );
}
}
}
return 0;
}
int final_sd( hashState_sd *state, BitSequence *hashval )
{
#ifdef HAS_64
uint64_t l;
int current = state->count & (state->blocksize - 1);
#else
uint32_t l;
int current = state->count_low & (state->blocksize - 1);
#endif
unsigned int i;
BitSequence bs[64];
int isshort = 1;
// If there is still some data in the buffer, hash it
if ( current )
{
// We first need to zero out the end of the buffer.
if ( current & 7 )
{
BitSequence mask = 0xff >> ( current & 7 );
state->buffer[current/8] &= ~mask;
}
current = ( current+7 ) / 8;
memset( state->buffer+current, 0, state->blocksize/8 - current );
SIMD_Compress( state, state->buffer, 0 );
}
//* Input the message length as the last block
memset( state->buffer, 0, state->blocksize / 8 );
#ifdef HAS_64
l = state->count;
for ( i=0; i<8; i++ )
{
state->buffer[i] = l & 0xff;
l >>= 8;
}
if ( state->count < 16384 )
isshort = 2;
#else
l = state->count_low;
for ( i=0; i<4; i++ )
{
state->buffer[i] = l & 0xff;
l >>= 8;
}
l = state->count_high;
for ( i=0; i<4; i++ )
{
state->buffer[4+i] = l & 0xff;
l >>= 8;
}
if ( state->count_high == 0 && state->count_low < 16384 )
isshort = 2;
#endif
SIMD_Compress( state, state->buffer, isshort );
// Decode the 32-bit words into a BitSequence
for ( i=0; i < 2*state->n_feistels; i++ )
{
u32 x = state->A[i];
bs[4*i ] = x&0xff;
x >>= 8;
bs[4*i+1] = x&0xff;
x >>= 8;
bs[4*i+2] = x&0xff;
x >>= 8;
bs[4*i+3] = x&0xff;
}
memcpy( hashval, bs, state->hashbitlen / 8 );
if ( state->hashbitlen % 8 )
{
BitSequence mask = 0xff << ( 8 - (state->hashbitlen % 8) );
hashval[state->hashbitlen/8 + 1] = bs[state->hashbitlen/8 + 1] & mask;
}
return 0;
}
int update_final_sd( hashState_sd *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen )
{
int current, i;
unsigned int bs = state->blocksize;
static int align = -1;
BitSequence out[64];
int isshort = 1;
uint64_t l;
if (align == -1)
align = RequiredAlignment();
#ifdef HAS_64
current = state->count & (bs - 1);
#else
current = state->count_low & (bs - 1);
#endif
if ( current & 7 )
{
// The number of hashed bits is not a multiple of 8.
// Very painfull to implement and not required by the NIST API.
return 1;
}
while ( databitlen > 0 )
{
if ( IS_ALIGNED(data,align) && current == 0 && databitlen >= bs )
{
// We can hash the data directly from the input buffer.
SIMD_Compress(state, data, 0);
databitlen -= bs;
data += bs/8;
IncreaseCounter(state, bs);
}
else
{
// Copy a chunk of data to the buffer
unsigned int len = bs - current;
if ( databitlen < len )
{
memcpy( state->buffer+current/8, data, (databitlen+7)/8 );
IncreaseCounter( state, databitlen );
break;
}
else
{
memcpy( state->buffer+current/8, data, len/8 );
IncreaseCounter( state,len );
databitlen -= len;
data += len/8;
current = 0;
SIMD_Compress( state, state->buffer, 0 );
}
}
}
current = state->count & (state->blocksize - 1);
// If there is still some data in the buffer, hash it
if ( current )
{
// We first need to zero out the end of the buffer.
if ( current & 7 )
{
BitSequence mask = 0xff >> ( current & 7 );
state->buffer[current/8] &= ~mask;
}
current = ( current+7 ) / 8;
memset( state->buffer+current, 0, state->blocksize/8 - current );
SIMD_Compress( state, state->buffer, 0 );
}
//* Input the message length as the last block
memset( state->buffer, 0, state->blocksize / 8 );
l = state->count;
for ( i=0; i<8; i++ )
{
state->buffer[i] = l & 0xff;
l >>= 8;
}
if ( state->count < 16384 )
isshort = 2;
SIMD_Compress( state, state->buffer, isshort );
// Decode the 32-bit words into a BitSequence
for ( i=0; i < 2*state->n_feistels; i++ )
{
u32 x = state->A[i];
out[4*i ] = x & 0xff;
x >>= 8;
out[4*i+1] = x & 0xff;
x >>= 8;
out[4*i+2] = x & 0xff;
x >>= 8;
out[4*i+3] = x & 0xff;
}
memcpy( hashval, out, state->hashbitlen / 8 );
if ( state->hashbitlen % 8 )
{
BitSequence mask = 0xff << ( 8 - (state->hashbitlen % 8) );
hashval[state->hashbitlen/8 + 1] = out[state->hashbitlen/8 + 1] & mask;
}
return 0;
}
int simd_full( hashState_sd *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen )
{
InitIV( state, 512, IV_512 );
int current, i;
unsigned int bs = state->blocksize;
static int align = -1;
BitSequence out[64];
int isshort = 1;
uint64_t l;
if (align == -1)
align = RequiredAlignment();
#ifdef HAS_64
current = state->count & (bs - 1);
#else
current = state->count_low & (bs - 1);
#endif
if ( current & 7 )
{
// The number of hashed bits is not a multiple of 8.
// Very painfull to implement and not required by the NIST API.
return 1;
}
while ( databitlen > 0 )
{
if ( IS_ALIGNED(data,align) && current == 0 && databitlen >= bs )
{
// We can hash the data directly from the input buffer.
SIMD_Compress(state, data, 0);
databitlen -= bs;
data += bs/8;
IncreaseCounter(state, bs);
}
else
{
// Copy a chunk of data to the buffer
unsigned int len = bs - current;
if ( databitlen < len )
{
memcpy( state->buffer+current/8, data, (databitlen+7)/8 );
IncreaseCounter( state, databitlen );
break;
}
else
{
memcpy( state->buffer+current/8, data, len/8 );
IncreaseCounter( state,len );
databitlen -= len;
data += len/8;
current = 0;
SIMD_Compress( state, state->buffer, 0 );
}
}
}
current = state->count & (state->blocksize - 1);
// If there is still some data in the buffer, hash it
if ( current )
{
// We first need to zero out the end of the buffer.
if ( current & 7 )
{
BitSequence mask = 0xff >> ( current & 7 );
state->buffer[current/8] &= ~mask;
}
current = ( current+7 ) / 8;
memset( state->buffer+current, 0, state->blocksize/8 - current );
SIMD_Compress( state, state->buffer, 0 );
}
//* Input the message length as the last block
memset( state->buffer, 0, state->blocksize / 8 );
l = state->count;
for ( i=0; i<8; i++ )
{
state->buffer[i] = l & 0xff;
l >>= 8;
}
if ( state->count < 16384 )
isshort = 2;
SIMD_Compress( state, state->buffer, isshort );
// Decode the 32-bit words into a BitSequence
for ( i=0; i < 2*state->n_feistels; i++ )
{
u32 x = state->A[i];
out[4*i ] = x & 0xff;
x >>= 8;
out[4*i+1] = x & 0xff;
x >>= 8;
out[4*i+2] = x & 0xff;
x >>= 8;
out[4*i+3] = x & 0xff;
}
memcpy( hashval, out, state->hashbitlen / 8 );
if ( state->hashbitlen % 8 )
{
BitSequence mask = 0xff << ( 8 - (state->hashbitlen % 8) );
hashval[state->hashbitlen/8 + 1] = out[state->hashbitlen/8 + 1] & mask;
}
return 0;
}
#endif

View File

@@ -1,64 +0,0 @@
#ifndef __NIST_H__
#define __NIST_H__
/*define data alignment for different C compilers*/
#if defined(__GNUC__)
#define DATA_ALIGN(x) x __attribute__((aligned(16)))
#else
#define DATA_ALIGN(x) __declspec(align(16)) x
#endif
#include "simd-compat.h"
#include "compat/sha3-defs.h"
/*
* NIST API Specific types.
*/
typedef struct {
unsigned int hashbitlen;
unsigned int blocksize;
unsigned int n_feistels;
#ifdef HAS_64
uint64_t count;
#else
uint32_t count_low;
uint32_t count_high;
#endif
DATA_ALIGN(uint32_t A[32]);
uint32_t *B;
uint32_t *C;
uint32_t *D;
DATA_ALIGN(unsigned char buffer[128]);
} hashState_sd;
/*
* NIST API
*/
int init_sd(hashState_sd *state, int hashbitlen);
int update_sd(hashState_sd *state, const BitSequence *data, DataLength databitlen);
int final_sd(hashState_sd *state, BitSequence *hashval);
int update_final_sd( hashState_sd *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen );
int simd_full( hashState_sd *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen );
/*
* Internal API
*/
//int SupportedLength(int hashbitlen);
int RequiredAlignment(void);
void SIMD_Compress(hashState_sd * state, const unsigned char *M, int final);
void fft128_natural(fft_t *a, unsigned char *x);
void fft256_natural(fft_t *a, unsigned char *x);
#endif

View File

@@ -1,198 +0,0 @@
#ifndef __SIMD_COMPAT_H__
#define __SIMD_COMPAT_H__
#include <limits.h>
/*
* This file desfines some helper function for cross-platform compatibility.
*/
#if defined __GNUC_PREREQ && (! defined __STRICT_ANSI__)
#define GNU_EXT
#endif
/*
* First define some integer types.
*/
#if defined __STDC__ && __STDC_VERSION__ >= 199901L
/*
* On C99 implementations, we can use <stdint.h> to get an exact 32-bit
* type, if any, or otherwise use a wider type.
*/
#include <stdint.h>
#include "compat/brg_types.h"
#define C32(x) ((u32)(x))
#define HAS_64 1
#else
/*
* On non-C99 systems, we use "unsigned int" if it is wide enough,
* "unsigned long" otherwise. This supports all "reasonable" architectures.
* We have to be cautious: pre-C99 preprocessors handle constants
* differently in '#if' expressions. Hence the shifts to test UINT_MAX.
*/
#if ((UINT_MAX >> 11) >> 11) >= 0x3FF
typedef unsigned int u32;
#define C32(x) ((u32)(x ## U))
#else
typedef unsigned long u32;
#define C32(x) ((u32)(x ## UL))
#endif
/*
* We want a 64-bit type. We use "unsigned long" if it is wide enough (as
* is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
* "unsigned long long" otherwise, if available. We use ULLONG_MAX to
* test whether "unsigned long long" is available; we also know that
* gcc features this type, even if the libc header do not know it.
*/
#if ((ULONG_MAX >> 31) >> 31) >= 3
typedef unsigned long u64;
#define HAS_64 1
#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
typedef unsigned long long u64;
#define HAS_64 1
#else
/*
* No 64-bit type...
*/
#endif
#endif
/*
* fft_t should be at least 16 bits wide.
* using short int will require less memory, but int is faster...
*/
typedef int fft_t;
/*
* Implementation note: some processors have specific opcodes to perform
* a rotation. Recent versions of gcc recognize the expression above and
* use the relevant opcodes, when appropriate.
*/
#define T32(x) ((x) & C32(0xFFFFFFFF))
#define ROTL32(x, n) T32(((x) << (n)) | ((x) >> (32 - (n))))
#define ROTR32(x, n) ROTL32(x, (32 - (n)))
/*
* The macro MAYBE_INLINE expands to an inline qualifier, is available.
*/
#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined GNU_EXT
#define MAYBE_INLINE static inline
#elif defined _MSC_VER
#define MAYBE_INLINE __inline
#else
#define MAYBE_INLINE
#endif
/* */
#if defined __GNUC__ && ( defined __i386__ || defined __x86_64__ )
#define rdtsc() \
({ \
u32 lo, hi; \
__asm__ __volatile__ ( /* serialize */ \
"xorl %%eax,%%eax \n cpuid" \
::: "%rax", "%rbx", "%rcx", "%rdx"); \
/* We cannot use "=A", since this would use %rax on x86_64 */ \
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); \
(u64)hi << 32 | lo; \
}) \
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
#define rdtsc __rdtsc
#endif
/*
* The IS_ALIGNED macro tests if a char* pointer is aligned to an
* n-bit boundary.
* It is defined as false on unknown architectures.
*/
#define CHECK_ALIGNED(p,n) ((((unsigned char *) (p) - (unsigned char *) NULL) & ((n)-1)) == 0)
#if defined __i386__ || defined __x86_64 || defined _M_IX86 || defined _M_X64
/*
* Unaligned 32-bit access are not expensive on x86 so we don't care
*/
#define IS_ALIGNED(p,n) (n<=4 || CHECK_ALIGNED(p,n))
#elif defined __sparcv9 || defined __sparc || defined __arm || \
defined __ia64 || defined __ia64__ || \
defined __itanium__ || defined __M_IA64 || \
defined __powerpc__ || defined __powerpc
#define IS_ALIGNED(p,n) CHECK_ALIGNED(p,n)
#else
/*
* Unkonwn architecture: play safe
*/
#define IS_ALIGNED(p,n) 0
#endif
/* checks for endianness */
#if defined (__linux__) || defined (__GLIBC__)
# include <endian.h>
#elif defined (__FreeBSD__)
# include <machine/endian.h>
#elif defined (__OpenBSD__)
# include <sys/endian.h>
#endif
#ifdef __BYTE_ORDER
# if __BYTE_ORDER == __LITTLE_ENDIAN
# define SIMD_LITTLE_ENDIAN
# elif __BYTE_ORDER == __BIG_ENDIAN
# define SIMD_BIG_ENDIAN
# endif
#else
# if defined __i386__ || defined __x86_64 || defined _M_IX86 || defined _M_X64
# define SIMD_LITTLE_ENDIAN
# endif
#endif
#endif

View File

@@ -1,7 +1,6 @@
#ifndef SIMD_HASH_2WAY_H__
#define SIMD_HASH_2WAY_H__ 1
#include "simd-compat.h"
#include "simd-utils.h"
#if defined(__SSE2__) || defined (__ARM_NEON)
@@ -34,7 +33,7 @@ typedef struct
unsigned int hashbitlen;
unsigned int blocksize;
unsigned int n_feistels;
} simd512_2way_context __attribute__((aligned(128)));
} simd512_2way_context __attribute__((aligned(64)));
#define simd_2way_context simd512_2way_context
// databitlen is bits

View File

@@ -1,948 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include "nist.h"
#include "vector.h"
//#if defined(__SSE2__) || defined(__ARM_NEON)
#if defined(__SSE2__)
#define PRINT_SOME 0
/*
int SupportedLength(int hashbitlen) {
if (hashbitlen <= 0 || hashbitlen > 512)
return 0;
else
return 1;
}
*/
int RequiredAlignment(void) {
return 16;
}
static const union cv V128 = CV(128);
static const union cv V255 = CV(255);
static const union cv V257 = CV(257);
static const union cv8 V0 = CV(0);
/*
* Reduce modulo 257; result is in [-127; 383]
* REDUCE(x) := (x&255) - (x>>8)
*/
#define REDUCE(x) \
v16_sub(v16_and(x, V255.v16), v16_shift_r (x, 8))
/*
* Reduce from [-127; 383] to [-128; 128]
* EXTRA_REDUCE_S(x) := x<=128 ? x : x-257
*/
#define EXTRA_REDUCE_S(x) \
v16_sub(x, v16_and(V257.v16, v16_cmp(x, V128.v16)))
/*
* Reduce modulo 257; result is in [-128; 128]
*/
#define REDUCE_FULL_S(x) \
EXTRA_REDUCE_S(REDUCE(x))
#define DO_REDUCE(i) \
X(i) = REDUCE(X(i))
#define DO_REDUCE_FULL_S(i) \
do { \
X(i) = REDUCE(X(i)); \
X(i) = EXTRA_REDUCE_S(X(i)); \
} while(0)
#define MAYBE_VOLATILE
MAYBE_INLINE void fft64(void *a) {
v16* const A = a;
register v16 X0, X1, X2, X3, X4, X5, X6, X7;
/*
#if V16_SIZE == 8
#define X(i) A[i]
#elif V16_SIZE == 4
#define X(i) A[2*i]
#endif
*/
#define X(i) X##i
X0 = A[0];
X1 = A[1];
X2 = A[2];
X3 = A[3];
X4 = A[4];
X5 = A[5];
X6 = A[6];
X7 = A[7];
#define DO_REDUCE(i) \
X(i) = REDUCE(X(i))
/*
* Begin with 8 parallels DIF FFT_8
*
* FFT_8 using w=4 as 8th root of unity
* Unrolled decimation in frequency (DIF) radix-2 NTT.
* Output data is in revbin_permuted order.
*/
static const int w[] = {0, 2, 4, 6};
// v16 *Twiddle = (v16*)FFT64_Twiddle;
#define BUTTERFLY(i,j,n) \
do { \
MAYBE_VOLATILE v16 v = X(j); \
X(j) = v16_add(X(i), X(j)); \
if (n) \
X(i) = v16_shift_l(v16_sub(X(i), v), w[n]); \
else \
X(i) = v16_sub(X(i), v); \
} while(0)
BUTTERFLY(0, 4, 0);
BUTTERFLY(1, 5, 1);
BUTTERFLY(2, 6, 2);
BUTTERFLY(3, 7, 3);
DO_REDUCE(2);
DO_REDUCE(3);
BUTTERFLY(0, 2, 0);
BUTTERFLY(4, 6, 0);
BUTTERFLY(1, 3, 2);
BUTTERFLY(5, 7, 2);
DO_REDUCE(1);
BUTTERFLY(0, 1, 0);
BUTTERFLY(2, 3, 0);
BUTTERFLY(4, 5, 0);
BUTTERFLY(6, 7, 0);
/* We don't need to reduce X(7) */
DO_REDUCE_FULL_S(0);
DO_REDUCE_FULL_S(1);
DO_REDUCE_FULL_S(2);
DO_REDUCE_FULL_S(3);
DO_REDUCE_FULL_S(4);
DO_REDUCE_FULL_S(5);
DO_REDUCE_FULL_S(6);
#undef BUTTERFLY
/*
* Multiply by twiddle factors
*/
X(6) = v16_mul(X(6), FFT64_Twiddle[0].v16);
X(5) = v16_mul(X(5), FFT64_Twiddle[1].v16);
X(4) = v16_mul(X(4), FFT64_Twiddle[2].v16);
X(3) = v16_mul(X(3), FFT64_Twiddle[3].v16);
X(2) = v16_mul(X(2), FFT64_Twiddle[4].v16);
X(1) = v16_mul(X(1), FFT64_Twiddle[5].v16);
X(0) = v16_mul(X(0), FFT64_Twiddle[6].v16);
/*
* Transpose the FFT state with a revbin order permutation
* on the rows and the column.
* This will make the full FFT_64 in order.
*/
#define INTERLEAVE(i,j) \
do { \
v16 t1= X(i); \
v16 t2= X(j); \
X(i) = v16_interleavel(t1, t2); \
X(j) = v16_interleaveh(t1, t2); \
} while(0)
INTERLEAVE(1, 0);
INTERLEAVE(3, 2);
INTERLEAVE(5, 4);
INTERLEAVE(7, 6);
INTERLEAVE(2, 0);
INTERLEAVE(3, 1);
INTERLEAVE(6, 4);
INTERLEAVE(7, 5);
INTERLEAVE(4, 0);
INTERLEAVE(5, 1);
INTERLEAVE(6, 2);
INTERLEAVE(7, 3);
#undef INTERLEAVE
/*
* Finish with 8 parallels DIT FFT_8
*
* FFT_8 using w=4 as 8th root of unity
* Unrolled decimation in time (DIT) radix-2 NTT.
* Intput data is in revbin_permuted order.
*/
#define BUTTERFLY(i,j,n) \
do { \
MAYBE_VOLATILE v16 u = X(j); \
if (n) \
X(i) = v16_shift_l(X(i), w[n]); \
X(j) = v16_sub(X(j), X(i)); \
X(i) = v16_add(u, X(i)); \
} while(0)
DO_REDUCE(0);
DO_REDUCE(1);
DO_REDUCE(2);
DO_REDUCE(3);
DO_REDUCE(4);
DO_REDUCE(5);
DO_REDUCE(6);
DO_REDUCE(7);
BUTTERFLY(0, 1, 0);
BUTTERFLY(2, 3, 0);
BUTTERFLY(4, 5, 0);
BUTTERFLY(6, 7, 0);
BUTTERFLY(0, 2, 0);
BUTTERFLY(4, 6, 0);
BUTTERFLY(1, 3, 2);
BUTTERFLY(5, 7, 2);
DO_REDUCE(3);
BUTTERFLY(0, 4, 0);
BUTTERFLY(1, 5, 1);
BUTTERFLY(2, 6, 2);
BUTTERFLY(3, 7, 3);
DO_REDUCE_FULL_S(0);
DO_REDUCE_FULL_S(1);
DO_REDUCE_FULL_S(2);
DO_REDUCE_FULL_S(3);
DO_REDUCE_FULL_S(4);
DO_REDUCE_FULL_S(5);
DO_REDUCE_FULL_S(6);
DO_REDUCE_FULL_S(7);
#undef BUTTERFLY
A[0] = X0;
A[1] = X1;
A[2] = X2;
A[3] = X3;
A[4] = X4;
A[5] = X5;
A[6] = X6;
A[7] = X7;
#undef X
}
MAYBE_INLINE void fft128(void *a) {
int i;
// Temp space to help for interleaving in the end
v16 B[8];
v16 *A = (v16*) a;
// v16 *Twiddle = (v16*)FFT128_Twiddle;
/* Size-2 butterflies */
for (i = 0; i<8; i++) {
B[i] = v16_add(A[i], A[i+8]);
B[i] = REDUCE_FULL_S(B[i]);
A[i+8] = v16_sub(A[i], A[i+8]);
A[i+8] = REDUCE_FULL_S(A[i+8]);
A[i+8] = v16_mul(A[i+8], FFT128_Twiddle[i].v16);
A[i+8] = REDUCE_FULL_S(A[i+8]);
}
fft64(B);
fft64(A+8);
/* Transpose (i.e. interleave) */
for (i=0; i<8; i++) {
A[2*i] = v16_interleavel (B[i], A[i+8]);
A[2*i+1] = v16_interleaveh (B[i], A[i+8]);
}
}
#ifdef v16_broadcast
/* Compute the FFT using a table
* The function works if the value of the message is smaller
* than 2^14.
*/
void fft128_msg_final(short *a, const unsigned char *x) {
static const union cv FFT128_Final_Table[] = {
{{ 1, -211, 60, -67, 2, 92, -137, 123}},
{{ 2, 118, 45, 111, 97, -46, 49, -106}},
{{ 4, -73, -17, -11, 8, 111, -34, -22}},
{{ -68, -4, 76, -25, 96, -96, -68, -9}},
{{ 16, -35, -68, -44, 32, -70, -136, -88}},
{{ 0, -124, 17, 12, -6, 57, 47, -8}},
{{ 64, 117, -15, 81, 128, -23, -30, -95}},
{{ -68, -53, -52, -70, -10, -117, 77, 21}},
{{ -1, -46, -60, 67, -2, -92, -120, -123}},
{{ -2, -118, -45, -111, -97, 46, -49, 106}},
{{ -4, 73, 17, 11, -8, -111, 34, 22}},
{{ 68, 4, -76, 25, -96, 96, 68, 9}},
{{ -16, -222, 68, 44, -32, 70, -121, 88}},
{{ 0, 124, -17, -12, 6, -57, -47, 8}},
{{ -64, -117, 15, -81, -128, -234, 30, 95}},
{{ 68, 53, 52, 70, 10, 117, -77, -21}},
{{-118, -31, 116, -61, 21, -62, -25, -122}},
{{-101, 107, -45, -95, -8, 3, 101, -34}},
{{ 42, -124, -50, 13, 84, 9, -100, -231}},
{{ -79, -53, 82, 65, -81, 47, 61, 107}},
{{ -89, -239, 57, -205, -178, 36, -143, 104}},
{{-126, 113, 33, 111, 103, -109, 65, -114}},
{{ -99, 72, -29, -49, -198, -113, -58, -98}},
{{ 8, -27, -106, -30, 111, 6, 10, -108}},
{{-139, 31, -116, -196, -21, 62, 25, -135}},
{{ 101, -107, 45, 95, 8, -3, -101, 34}},
{{ -42, -133, 50, -13, -84, -9, 100, -26}},
{{ 79, 53, -82, -65, 81, -47, -61, -107}},
{{-168, -18, -57, -52, -79, -36, -114, -104}},
{{ 126, -113, -33, -111, -103, 109, -65, 114}},
{{ 99, -72, -228, 49, -59, 113, 58, -159}},
{{ -8, 27, 106, 30, -111, -6, -10, 108}}
};
// v16 *Table = (v16*)FFT128_Final_Table;
v16 *A = (v16*) a;
v16 msg1 = v16_broadcast(x[0]>128?x[0]-257:x[0]);
v16 msg2 = v16_broadcast(x[1]>128?x[1]-257:x[1]);
// v16 msg2 = v16_broadcast(x[1]);
#if 0
int i;
for (i=0; i<16; i++) {
v16 tmp = v16_mul(FFT128_Final_Table[2*i].v16 , msg2);
v16 sum = v16_add(FFT128_Final_Table[2*i+1].v16, msg1);
sum = v16_add(sum, tmp);
A[i] = REDUCE_FULL_S(sum);
}
#else
#define FFT_FINAL(i) \
v16 tmp##i = v16_mul(FFT128_Final_Table[2*i].v16, msg2); \
v16 sum##i = v16_add(FFT128_Final_Table[2*i+1].v16, msg1); \
sum##i = v16_add(sum##i, tmp##i); \
A[i] = REDUCE_FULL_S(sum##i);
FFT_FINAL(0)
FFT_FINAL(1)
FFT_FINAL(2)
FFT_FINAL(3)
FFT_FINAL(4)
FFT_FINAL(5)
FFT_FINAL(6)
FFT_FINAL(7)
FFT_FINAL(8)
FFT_FINAL(9)
FFT_FINAL(10)
FFT_FINAL(11)
FFT_FINAL(12)
FFT_FINAL(13)
FFT_FINAL(14)
FFT_FINAL(15)
#endif
}
#endif
void fft128_msg(short *a, const unsigned char *x, int final) {
static const union cv Tweak =
{{0,0,0,0,0,0,0,1}};
static const union cv FinalTweak =
{{0,0,0,0,0,1,0,1}};
v8 *X = (v8*) x;
v16 *A = (v16*) a;
// v16 *Twiddle = (v16*)FFT128_Twiddle;
#define UNPACK(i) \
do { \
v8 t = X[i]; \
A[2*i] = v8_mergel(t, V0.v8); \
A[2*i+8] = v16_mul(A[2*i], FFT128_Twiddle[2*i].v16); \
A[2*i+8] = REDUCE(A[2*i+8]); \
A[2*i+1] = v8_mergeh(t, V0.v8); \
A[2*i+9] = v16_mul(A[2*i+1], FFT128_Twiddle[2*i+1].v16); \
A[2*i+9] = REDUCE(A[2*i+9]); \
} while(0)
/*
* This allows to tweak the last butterflies to introduce X^127
*/
#define UNPACK_TWEAK(i,tw) \
do { \
v8 t = X[i]; \
v16 tmp; \
A[2*i] = v8_mergel(t, V0.v8); \
A[2*i+8] = v16_mul(A[2*i], FFT128_Twiddle[2*i].v16); \
A[2*i+8] = REDUCE(A[2*i+8]); \
tmp = v8_mergeh(t, V0.v8); \
A[2*i+1] = v16_add(tmp, tw); \
A[2*i+9] = v16_mul(v16_sub(tmp, tw), FFT128_Twiddle[2*i+1].v16); \
A[2*i+9] = REDUCE(A[2*i+9]); \
} while(0)
UNPACK(0);
UNPACK(1);
UNPACK(2);
if (final)
UNPACK_TWEAK(3, FinalTweak.v16);
else
UNPACK_TWEAK(3, Tweak.v16);
#undef UNPACK
#undef UNPACK_TWEAK
fft64(a);
fft64(a+64);
}
#if 0
void fft128_msg(short *a, const unsigned char *x, int final) {
for (int i=0; i<64; i++)
a[i] = x[i];
for (int i=64; i<128; i++)
a[i] = 0;
a[127] = 1;
a[125] = final? 1: 0;
fft128(a);
}
#endif
void fft256_msg(short *a, const unsigned char *x, int final) {
static const union cv Tweak =
{{0,0,0,0,0,0,0,1}};
static const union cv FinalTweak =
{{0,0,0,0,0,1,0,1}};
v8 *X = (v8*) x;
v16 *A = (v16*) a;
// v16 *Twiddle = (v16*)FFT256_Twiddle;
#define UNPACK(i) \
do { \
v8 t = X[i]; \
A[2*i] = v8_mergel(t, V0.v8); \
A[2*i+16] = v16_mul(A[2*i], FFT256_Twiddle[2*i].v16); \
A[2*i+16] = REDUCE(A[2*i+16]); \
A[2*i+1] = v8_mergeh(t, V0.v8); \
A[2*i+17] = v16_mul(A[2*i+1], FFT256_Twiddle[2*i+1].v16); \
A[2*i+17] = REDUCE(A[2*i+17]); \
} while(0)
/*
* This allows to tweak the last butterflies to introduce X^127
*/
#define UNPACK_TWEAK(i,tw) \
do { \
v8 t = X[i]; \
v16 tmp; \
A[2*i] = v8_mergel(t, V0.v8); \
A[2*i+16] = v16_mul(A[2*i], FFT256_Twiddle[2*i].v16); \
A[2*i+16] = REDUCE(A[2*i+16]); \
tmp = v8_mergeh(t, V0.v8); \
A[2*i+1] = v16_add(tmp, tw); \
A[2*i+17] = v16_mul(v16_sub(tmp, tw), FFT256_Twiddle[2*i+1].v16); \
A[2*i+17] = REDUCE(A[2*i+17]); \
} while(0)
UNPACK(0);
UNPACK(1);
UNPACK(2);
UNPACK(3);
UNPACK(4);
UNPACK(5);
UNPACK(6);
if (final)
UNPACK_TWEAK(7, FinalTweak.v16);
else
UNPACK_TWEAK(7, Tweak.v16);
#undef UNPACK
#undef UNPACK_TWEAK
fft128(a);
fft128(a+128);
}
void rounds(u32* state, const unsigned char* msg, short* fft) {
v32* S = (v32*) state;
const v32* M = (v32*)msg;
volatile v16* W = (v16*)fft;
register v32 S0, S1, S2, S3;
static const union cv code[] = { CV(185), CV(233) };
S0 = v32_xor(S[0], v32_bswap(M[0]));
S1 = v32_xor(S[1], v32_bswap(M[1]));
S2 = v32_xor(S[2], v32_bswap(M[2]));
S3 = v32_xor(S[3], v32_bswap(M[3]));
#define S(i) S##i
/* #define F_0(B, C, D) ((((C) ^ (D)) & (B)) ^ (D)) */
/* #define F_1(B, C, D) (((D) & (C)) | (((D) | (C)) & (B))) */
#define F_0(B, C, D) v32_xor(v32_and(v32_xor(C,D), B), D)
#define F_1(B, C, D) v32_or(v32_and(D, C), v32_and( v32_or(D,C), B))
#define F(a,b,c,fun) F_##fun (a,b,c)
/*
* We split the round function in two halfes
* so as to insert some independent computations in between
*/
#define SUM3_00 1
#define SUM3_01 2
#define SUM3_02 3
#define SUM3_10 2
#define SUM3_11 3
#define SUM3_12 1
#define SUM3_20 3
#define SUM3_21 1
#define SUM3_22 2
#define STEP_1(a,b,c,d,w,fun,r,s,z) \
do { \
if (PRINT_SOME) { \
int j; \
v32 ww=w, aa=a, bb=b, cc=c, dd=d; \
u32 *WW = (void*)&ww; \
u32 *AA = (void*)&aa; \
u32 *BB = (void*)&bb; \
u32 *CC = (void*)&cc; \
u32 *DD = (void*)&dd; \
for (j=0; j<4; j++) { \
printf ("%08x/%2i/%2i[%i]: %08x %08x %08x %08x\n", \
WW[j], r, s, SUM3_##z, \
AA[j], BB[j], CC[j], DD[j]); \
} \
} \
TT = F(a,b,c,fun); \
a = v32_rotate(a,r); \
w = v32_add(w, d); \
TT = v32_add(TT, w); \
TT = v32_rotate(TT,s); \
d = v32_shufxor(a,SUM3_##z); \
} while(0)
#define STEP_2(a,b,c,d,w,fun,r,s) \
do { \
d = v32_add(d, TT); \
} while(0)
#define STEP(a,b,c,d,w,fun,r,s,z) \
do { \
register v32 TT; \
STEP_1(a,b,c,d,w,fun,r,s,z); \
STEP_2(a,b,c,d,w,fun,r,s); \
} while(0);
#define ROUND(h0,l0,u0,h1,l1,u1,h2,l2,u2,h3,l3,u3, \
fun,r,s,t,u,z,r0) \
do { \
register v32 W0, W1, W2, W3, TT; \
W0 = v16_merge##u0(W[h0], W[l0]); \
W0 = V1632(v16_mul(V3216(W0), code[z].v16)); \
STEP_1(S(0), S(1), S(2), S(3), W0, fun, r, s, r0##0); \
W1 = v16_merge##u1(W[h1], W[l1]); \
W1 = V1632(v16_mul(V3216(W1), code[z].v16)); \
STEP_2(S(0), S(1), S(2), S(3), W0, fun, r, s); \
STEP_1(S(3), S(0), S(1), S(2), W1, fun, s, t, r0##1); \
W2 = v16_merge##u2(W[h2], W[l2]); \
W2 = V1632(v16_mul(V3216(W2), code[z].v16)); \
STEP_2(S(3), S(0), S(1), S(2), W1, fun, s, t); \
STEP_1(S(2), S(3), S(0), S(1), W2, fun, t, u, r0##2); \
W3 = v16_merge##u3(W[h3], W[l3]); \
W3 = V1632(v16_mul(V3216(W3), code[z].v16)); \
STEP_2(S(2), S(3), S(0), S(1), W2, fun, t, u); \
STEP_1(S(1), S(2), S(3), S(0), W3, fun, u, r, r0##0); \
STEP_2(S(1), S(2), S(3), S(0), W3, fun, u, r); \
} while(0)
/*
* 4 rounds with code 185
*/
ROUND( 2, 10, l, 3, 11, l, 0, 8, l, 1, 9, l, 0, 3, 23, 17, 27, 0, 0);
ROUND( 3, 11, h, 2, 10, h, 1, 9, h, 0, 8, h, 1, 3, 23, 17, 27, 0, 1);
ROUND( 7, 15, h, 5, 13, h, 6, 14, l, 4, 12, l, 0, 28, 19, 22, 7, 0, 2);
ROUND( 4, 12, h, 6, 14, h, 5, 13, l, 7, 15, l, 1, 28, 19, 22, 7, 0, 0);
/*
* 4 rounds with code 233
*/
ROUND( 0, 4, h, 1, 5, l, 3, 7, h, 2, 6, l, 0, 29, 9, 15, 5, 1, 1);
ROUND( 3, 7, l, 2, 6, h, 0, 4, l, 1, 5, h, 1, 29, 9, 15, 5, 1, 2);
ROUND( 11, 15, l, 8, 12, l, 8, 12, h, 11, 15, h, 0, 4, 13, 10, 25, 1, 0);
ROUND( 9, 13, h, 10, 14, h, 10, 14, l, 9, 13, l, 1, 4, 13, 10, 25, 1, 1);
/*
* 1 round as feed-forward
*/
STEP(S(0), S(1), S(2), S(3), S[0], 0, 4, 13, 20);
STEP(S(3), S(0), S(1), S(2), S[1], 0, 13, 10, 21);
STEP(S(2), S(3), S(0), S(1), S[2], 0, 10, 25, 22);
STEP(S(1), S(2), S(3), S(0), S[3], 0, 25, 4, 20);
S[0] = S(0); S[1] = S(1); S[2] = S(2); S[3] = S(3);
#undef ROUND
#undef STEP
#undef STEP_1
#undef STEP_2
}
void rounds512(u32* state, const unsigned char* msg, short* fft) {
v32* S = (v32*) state;
v32* M = (v32*) msg;
v16* W = (v16*) fft;
register v32 S0l, S1l, S2l, S3l;
register v32 S0h, S1h, S2h, S3h;
static const union cv code[] = { CV(185), CV(233) };
S0l = v32_xor(S[0], v32_bswap(M[0]));
S0h = v32_xor(S[1], v32_bswap(M[1]));
S1l = v32_xor(S[2], v32_bswap(M[2]));
S1h = v32_xor(S[3], v32_bswap(M[3]));
S2l = v32_xor(S[4], v32_bswap(M[4]));
S2h = v32_xor(S[5], v32_bswap(M[5]));
S3l = v32_xor(S[6], v32_bswap(M[6]));
S3h = v32_xor(S[7], v32_bswap(M[7]));
#define S(i) S##i
/* #define F_0(B, C, D) ((((C) ^ (D)) & (B)) ^ (D)) */
/* #define F_1(B, C, D) (((D) & (C)) | (((D) | (C)) & (B))) */
#define F_0(B, C, D) v32_xor(v32_and(v32_xor(C,D), B), D)
#define F_1(B, C, D) v32_or(v32_and(D, C), v32_and( v32_or(D,C), B))
#define Fl(a,b,c,fun) F_##fun (a##l,b##l,c##l)
#define Fh(a,b,c,fun) F_##fun (a##h,b##h,c##h)
/*
* We split the round function in two halfes
* so as to insert some independent computations in between
*/
#define SUM7_00 0
#define SUM7_01 1
#define SUM7_02 2
#define SUM7_03 3
#define SUM7_04 4
#define SUM7_05 5
#define SUM7_06 6
#define SUM7_10 1
#define SUM7_11 2
#define SUM7_12 3
#define SUM7_13 4
#define SUM7_14 5
#define SUM7_15 6
#define SUM7_16 0
#define SUM7_20 2
#define SUM7_21 3
#define SUM7_22 4
#define SUM7_23 5
#define SUM7_24 6
#define SUM7_25 0
#define SUM7_26 1
#define SUM7_30 3
#define SUM7_31 4
#define SUM7_32 5
#define SUM7_33 6
#define SUM7_34 0
#define SUM7_35 1
#define SUM7_36 2
#define SUM7_40 4
#define SUM7_41 5
#define SUM7_42 6
#define SUM7_43 0
#define SUM7_44 1
#define SUM7_45 2
#define SUM7_46 3
#define SUM7_50 5
#define SUM7_51 6
#define SUM7_52 0
#define SUM7_53 1
#define SUM7_54 2
#define SUM7_55 3
#define SUM7_56 4
#define SUM7_60 6
#define SUM7_61 0
#define SUM7_62 1
#define SUM7_63 2
#define SUM7_64 3
#define SUM7_65 4
#define SUM7_66 5
#define PERM(z,d,a) XCAT(PERM_,XCAT(SUM7_##z,PERM_START))(d,a)
#define PERM_0(d,a) /* XOR 1 */ \
do { \
d##l = v32_shufxor(a##l,1); \
d##h = v32_shufxor(a##h,1); \
} while(0)
#define PERM_1(d,a) /* XOR 6 */ \
do { \
d##l = v32_shufxor(a##h,2); \
d##h = v32_shufxor(a##l,2); \
} while(0)
#define PERM_2(d,a) /* XOR 2 */ \
do { \
d##l = v32_shufxor(a##l,2); \
d##h = v32_shufxor(a##h,2); \
} while(0)
#define PERM_3(d,a) /* XOR 3 */ \
do { \
d##l = v32_shufxor(a##l,3); \
d##h = v32_shufxor(a##h,3); \
} while(0)
#define PERM_4(d,a) /* XOR 5 */ \
do { \
d##l = v32_shufxor(a##h,1); \
d##h = v32_shufxor(a##l,1); \
} while(0)
#define PERM_5(d,a) /* XOR 7 */ \
do { \
d##l = v32_shufxor(a##h,3); \
d##h = v32_shufxor(a##l,3); \
} while(0)
#define PERM_6(d,a) /* XOR 4 */ \
do { \
d##l = a##h; \
d##h = a##l; \
} while(0)
#define STEP_1_(a,b,c,d,w,fun,r,s,z) \
do { \
if (PRINT_SOME) { \
int j; \
v32 ww=w##l, aa=a##l, bb=b##l, cc=c##l, dd=d##l; \
u32 *WW = (void*)&ww; \
u32 *AA = (void*)&aa; \
u32 *BB = (void*)&bb; \
u32 *CC = (void*)&cc; \
u32 *DD = (void*)&dd; \
for (j=0; j<4; j++) { \
printf ("%08x/%2i/%2i: %08x %08x %08x %08x\n", \
WW[j], r, s, \
AA[j], BB[j], CC[j], DD[j]); \
} \
} \
TTl = Fl(a,b,c,fun); \
TTh = Fh(a,b,c,fun); \
a##l = v32_rotate(a##l,r); \
a##h = v32_rotate(a##h,r); \
w##l = v32_add(w##l, d##l); \
w##h = v32_add(w##h, d##h); \
TTl = v32_add(TTl, w##l); \
TTh = v32_add(TTh, w##h); \
TTl = v32_rotate(TTl,s); \
TTh = v32_rotate(TTh,s); \
PERM(z,d,a); \
} while(0)
#define STEP_1(a,b,c,d,w,fun,r,s,z) \
STEP_1_(a,b,c,d,w,fun,r,s,z)
#define STEP_2_(a,b,c,d,w,fun,r,s) \
do { \
d##l = v32_add(d##l, TTl); \
d##h = v32_add(d##h, TTh); \
} while(0)
#define STEP_2(a,b,c,d,w,fun,r,s) \
STEP_2_(a,b,c,d,w,fun,r,s)
#define STEP(a,b,c,d,w1,w2,fun,r,s,z) \
do { \
register v32 TTl, TTh, Wl=w1, Wh=w2; \
STEP_1(a,b,c,d,W,fun,r,s,z); \
STEP_2(a,b,c,d,W,fun,r,s); \
} while(0);
#define MSG_l(x) (2*(x))
#define MSG_h(x) (2*(x)+1)
#define MSG(w,hh,ll,u,z) \
do { \
int a = MSG_##u(hh); \
int b = MSG_##u(ll); \
w##l = v16_mergel(W[a], W[b]); \
w##l = V1632(v16_mul(V3216(w##l), code[z].v16)); \
w##h = v16_mergeh(W[a], W[b]); \
w##h = V1632(v16_mul(V3216(w##h), code[z].v16)); \
} while(0)
#define ROUND(h0,l0,u0,h1,l1,u1,h2,l2,u2,h3,l3,u3, \
fun,r,s,t,u,z) \
do { \
register v32 W0l, W1l, W2l, W3l, TTl; \
register v32 W0h, W1h, W2h, W3h, TTh; \
MSG(W0,h0,l0,u0,z); \
STEP_1(S(0), S(1), S(2), S(3), W0, fun, r, s, 0); \
MSG(W1,h1,l1,u1,z); \
STEP_2(S(0), S(1), S(2), S(3), W0, fun, r, s); \
STEP_1(S(3), S(0), S(1), S(2), W1, fun, s, t, 1); \
MSG(W2,h2,l2,u2,z); \
STEP_2(S(3), S(0), S(1), S(2), W1, fun, s, t); \
STEP_1(S(2), S(3), S(0), S(1), W2, fun, t, u, 2); \
MSG(W3,h3,l3,u3,z); \
STEP_2(S(2), S(3), S(0), S(1), W2, fun, t, u); \
STEP_1(S(1), S(2), S(3), S(0), W3, fun, u, r, 3); \
STEP_2(S(1), S(2), S(3), S(0), W3, fun, u, r); \
} while(0)
/*
* 4 rounds with code 185
*/
#define PERM_START 0
ROUND( 2, 10, l, 3, 11, l, 0, 8, l, 1, 9, l, 0, 3, 23, 17, 27, 0);
#undef PERM_START
#define PERM_START 4
ROUND( 3, 11, h, 2, 10, h, 1, 9, h, 0, 8, h, 1, 3, 23, 17, 27, 0);
#undef PERM_START
#define PERM_START 1
ROUND( 7, 15, h, 5, 13, h, 6, 14, l, 4, 12, l, 0, 28, 19, 22, 7, 0);
#undef PERM_START
#define PERM_START 5
ROUND( 4, 12, h, 6, 14, h, 5, 13, l, 7, 15, l, 1, 28, 19, 22, 7, 0);
#undef PERM_START
/*
* 4 rounds with code 233
*/
#define PERM_START 2
ROUND( 0, 4, h, 1, 5, l, 3, 7, h, 2, 6, l, 0, 29, 9, 15, 5, 1);
#undef PERM_START
#define PERM_START 6
ROUND( 3, 7, l, 2, 6, h, 0, 4, l, 1, 5, h, 1, 29, 9, 15, 5, 1);
#undef PERM_START
#define PERM_START 3
ROUND( 11, 15, l, 8, 12, l, 8, 12, h, 11, 15, h, 0, 4, 13, 10, 25, 1);
#undef PERM_START
#define PERM_START 0
ROUND( 9, 13, h, 10, 14, h, 10, 14, l, 9, 13, l, 1, 4, 13, 10, 25, 1);
#undef PERM_START
/*
* 1 round as feed-forward
*/
#define PERM_START 4
STEP(S(0), S(1), S(2), S(3), S[0], S[1], 0, 4, 13, 0);
STEP(S(3), S(0), S(1), S(2), S[2], S[3], 0, 13, 10, 1);
STEP(S(2), S(3), S(0), S(1), S[4], S[5], 0, 10, 25, 2);
STEP(S(1), S(2), S(3), S(0), S[6], S[7], 0, 25, 4, 3);
#undef PERM_START
S[0] = S0l; S[1] = S0h; S[2] = S1l; S[3] = S1h;
S[4] = S2l; S[5] = S2h; S[6] = S3l; S[7] = S3h;
#undef ROUND
#undef STEP
#undef STEP_1
#undef STEP_2
}
void SIMD_Compress(hashState_sd * state, const unsigned char *m, int final) {
if (state->hashbitlen <= 256) {
union cv Y[16];
short* y = (short*) Y[0].u16;
#ifdef v16_broadcast
if (final == 2) {
fft128_msg_final(y, m);
rounds(state->A, m, y);
} else {
fft128_msg(y, m, final);
rounds(state->A, m, y);
}
#else
fft128_msg(y, m, final);
rounds(state->A, m, y);
#endif
} else {
union cv Y[32];
short* y = (short*) Y[0].u16;
fft256_msg(y, m, final);
rounds512(state->A, m, y);
}
}
/*
* Give the FFT output in the regular order for consitancy checks
*/
void fft128_natural(fft_t *x, unsigned char *a) {
union cv Y[16];
short* y = (short*) Y[0].u16;
int i;
fft128_msg(y, a, 0);
for(i=0; i<64; i++) {
x[2*i] = y[i];
x[2*i+1] = y[i+64];
}
}
#endif // SSE2

View File

@@ -1,246 +0,0 @@
#ifndef __VECTOR_H__
#define __VECTOR_H__
#include "compat.h"
#include "simd-utils.h"
/*******************************
* Using GCC vector extensions *
*******************************/
//typedef unsigned char v16qi __attribute__ ((vector_size (16)));
typedef char v16qi __attribute__ ((vector_size (16)));
typedef short v8hi __attribute__ ((vector_size (16)));
typedef int v4si __attribute__ ((vector_size (16)));
typedef float v4sf __attribute__ ((vector_size (16)));
typedef long long int v2di __attribute__ ((vector_size (16)));
typedef short v4hi __attribute__ ((vector_size (8)));
typedef unsigned char v8qi __attribute__ ((vector_size (8)));
typedef v16qi v8;
typedef v8hi v16;
typedef v4si v32;
#define V16_SIZE 8
union cv {
unsigned short u16[8];
v16 v16;
};
union cv8 {
unsigned char u8[16];
v8 v8;
};
union u32 {
u32 u[4];
v32 v;
};
#define V3216(x) ((v16) (x))
#define V1632(x) ((v32) (x))
#define V168(x) ( (v8) (x))
#define V816(x) ((v16) (x))
#if 0
/* These instruction are shorter than the PAND/POR/... that GCC uses */
#define vec_and(x,y) ({v16 a = (v16) x; v16 b = (v16) y; __builtin_ia32_andps ((v4sf) a, (v4sf) b);})
#define vec_or(x,y) ({v16 a = (v16) x; v16 b = (v16) y; __builtin_ia32_orps ((v4sf) a, (v4sf) b);})
#define vec_xor(x,y) ({v16 a = (v16) x; v16 b = (v16) y; __builtin_ia32_xorps ((v4sf) a, (v4sf) b);})
#define vec_andn(x,y) ({v16 a = (v16) x; v16 b = (v16) y; __builtin_ia32_andnps ((v4sf) a, (v4sf) b);})
#define v16_and(x,y) ((v16) vec_and ((x), (y)))
#define v16_or(x,y) ((v16) vec_or ((x), (y)))
#define v16_xor(x,y) ((v16) vec_xor ((x), (y)))
#define v16_andn(x,y) ((v16) vec_andn((x), (y)))
#define v32_and(x,y) ((v32) vec_and ((x), (y)))
#define v32_or(x,y) ((v32) vec_or ((x), (y)))
#define v32_xor(x,y) ((v32) vec_xor ((x), (y)))
#define v32_andn(x,y) ((v32) vec_andn((x), (y)))
#endif
#if defined(__SSE2__)
#define vec_and(x,y) ((x)&(y))
#define vec_or(x,y) ((x)|(y))
#define vec_xor(x,y) ((x)^(y))
#define v16_and vec_and
#define v16_or vec_or
#define v16_xor vec_xor
#define v32_and vec_and
#define v32_or vec_or
#define v32_xor vec_xor
#define vec_andn(x,y) __builtin_ia32_pandn128 ((v2di) x, (v2di) y)
#define v16_andn(x,y) ((v16) vec_andn(x,y))
#define v32_andn(x,y) ((v32) vec_andn(x,y))
#define v32_add(x,y) ((x)+(y))
#define v16_add(x,y) ((x)+(y))
#define v16_sub(x,y) ((x)-(y))
#define v16_mul(x,y) ((x)*(y))
#define v16_neg(x) (-(x))
#define v16_shift_l __builtin_ia32_psllwi128
#define v16_shift_r __builtin_ia32_psrawi128
#define v16_cmp __builtin_ia32_pcmpgtw128
#define v16_interleavel __builtin_ia32_punpcklwd128
#define v16_interleaveh __builtin_ia32_punpckhwd128
#define v16_mergel(a,b) V1632(__builtin_ia32_punpcklwd128(a,b))
#define v16_mergeh(a,b) V1632(__builtin_ia32_punpckhwd128(a,b))
#define v8_mergel(a,b) V816(__builtin_ia32_punpcklbw128(a,b))
#define v8_mergeh(a,b) V816(__builtin_ia32_punpckhbw128(a,b))
#define v32_shift_l __builtin_ia32_pslldi128
#define v32_shift_r __builtin_ia32_psrldi128
#define v32_rotate(x,n) \
v32_or(v32_shift_l(x,n), v32_shift_r(x,32-(n)))
#define v32_shuf __builtin_ia32_pshufd
#define SHUFXOR_1 0xb1 /* 0b10110001 */
#define SHUFXOR_2 0x4e /* 0b01001110 */
#define SHUFXOR_3 0x1b /* 0b00011011 */
#define CAT(x, y) x##y
#define XCAT(x,y) CAT(x,y)
#define v32_shufxor(x,s) v32_shuf(x,XCAT(SHUFXOR_,s))
#define v32_bswap(x) (x)
#define v16_broadcast(x) ({ \
union u32 u; \
u32 xx = x; \
u.u[0] = xx | (xx << 16); \
V3216(v32_shuf(u.v,0)); })
#define CV(x) {{x, x, x, x, x, x, x, x}}
#elif defined(__aarch64__) && defined(__ARM_NEON)
#define vec_and( x, y ) v128_and( x, y )
#define vec_or(x,y) v128_or( x, y )
#define vec_xor(x,y) v128_xor( x, y )
#define v16_and v128_and
#define v16_or v128_or
#define v16_xor v128_xor
#define v32_and v128_and
#define v32_or v128_or
#define v32_xor v128_xor
#define vec_andn( x,y ) v128_andnot( x, y )
#define v16_andn vec_andn
#define v32_andn vec_andn
#define v32_add( x, y ) v128_add32( x, y )
#define v16_add( x, y ) v128_add16( x, y )
#define v16_sub( x, y ) v128_sub16( x, y )
#define v16_mul( x, y ) v128_mul16( x, y )
#define v16_neg(x) v128_negate16( x )
#define v16_shift_l( x, c ) v128_sl16
#define v16_shift_r v128_sr16
#define v16_cmp v128_cmpgt16
#define v16_interleavel v128_unpacklo16
#define v16_interleaveh v128_unpackhi16
#define v16_mergel(a,b) V1632(__builtin_ia32_punpcklwd128(a,b))
#define v16_mergeh(a,b) V1632(__builtin_ia32_punpckhwd128(a,b))
#define v8_mergel(a,b) V816(__builtin_ia32_punpcklbw128(a,b))
#define v8_mergeh(a,b) V816(__builtin_ia32_punpckhbw128(a,b))
#define v32_shift_l v128_sl32
#define v32_shift_r v128_sr32
#define v32_rotate(x,n) v128_rol32
#define v32_shuf __builtin_ia32_pshufd
#define SHUFXOR_1 0xb1 /* 0b10110001 */
#define SHUFXOR_2 0x4e /* 0b01001110 */
#define SHUFXOR_3 0x1b /* 0b00011011 */
#define CAT(x, y) x##y
#define XCAT(x,y) CAT(x,y)
#define v32_shufxor(x,s) v32_shuf(x,XCAT(SHUFXOR_,s))
#define v32_bswap(x) (x)
#define v16_broadcast(x) ({ \
union u32 u; \
u32 xx = x; \
u.u[0] = xx | (xx << 16); \
V3216(v32_shuf(u.v,0)); })
#define CV(x) {{x, x, x, x, x, x, x, x}}
#else
#error "I don't know how to vectorize on this architecture."
#endif
/* Twiddle tables */
static const union cv FFT64_Twiddle[] = {
{{1, 2, 4, 8, 16, 32, 64, 128}},
{{1, 60, 2, 120, 4, -17, 8, -34}},
{{1, 120, 8, -68, 64, -30, -2, 17}},
{{1, 46, 60, -67, 2, 92, 120, 123}},
{{1, 92, -17, -22, 32, 117, -30, 67}},
{{1, -67, 120, -73, 8, -22, -68, -70}},
{{1, 123, -34, -70, 128, 67, 17, 35}},
};
static const union cv FFT128_Twiddle[] = {
{{ 1, -118, 46, -31, 60, 116, -67, -61}},
{{ 2, 21, 92, -62, 120, -25, 123, -122}},
{{ 4, 42, -73, -124, -17, -50, -11, 13}},
{{ 8, 84, 111, 9, -34, -100, -22, 26}},
{{ 16, -89, -35, 18, -68, 57, -44, 52}},
{{ 32, 79, -70, 36, 121, 114, -88, 104}},
{{ 64, -99, 117, 72, -15, -29, 81, -49}},
{{128, 59, -23, -113, -30, -58, -95, -98}},
};
static const union cv FFT256_Twiddle[] = {
{{ 1, 41, -118, 45, 46, 87, -31, 14}},
{{ 60, -110, 116, -127, -67, 80, -61, 69}},
{{ 2, 82, 21, 90, 92, -83, -62, 28}},
{{ 120, 37, -25, 3, 123, -97, -122, -119}},
{{ 4, -93, 42, -77, -73, 91, -124, 56}},
{{ -17, 74, -50, 6, -11, 63, 13, 19}},
{{ 8, 71, 84, 103, 111, -75, 9, 112}},
{{ -34, -109, -100, 12, -22, 126, 26, 38}},
{{ 16, -115, -89, -51, -35, 107, 18, -33}},
{{ -68, 39, 57, 24, -44, -5, 52, 76}},
{{ 32, 27, 79, -102, -70, -43, 36, -66}},
{{ 121, 78, 114, 48, -88, -10, 104, -105}},
{{ 64, 54, -99, 53, 117, -86, 72, 125}},
{{ -15, -101, -29, 96, 81, -20, -49, 47}},
{{ 128, 108, 59, 106, -23, 85, -113, -7}},
{{ -30, 55, -58, -65, -95, -40, -98, 94}}
};
#endif

View File

@@ -13,11 +13,7 @@
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#include "algo/luffa/luffa_for_sse2.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
@@ -43,11 +39,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
} c11_ctx_holder;
c11_ctx_holder c11_ctx __attribute__ ((aligned (64)));
@@ -69,11 +61,6 @@ void init_c11_ctx()
init_luffa( &c11_ctx.luffa, 512 );
cubehashInit( &c11_ctx.cube, 512, 16, 32 );
sph_shavite512_init( &c11_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &c11_ctx.simd );
#else
init_sd( &c11_ctx.simd, 512 );
#endif
}
void c11_hash( void *output, const void *input )
@@ -105,41 +92,35 @@ void c11_hash( void *output, const void *input )
sph_skein512( &ctx.skein, (const void*) hash, 64 );
sph_skein512_close( &ctx.skein, hash );
update_and_final_luffa( &ctx.luffa, hash, hash, 64 );
update_and_final_luffa( &ctx.luffa, hash, hash, 64 );
cubehashUpdateDigest( &ctx.cube, hash, hash, 64 );
cubehashUpdateDigest( &ctx.cube, hash, hash, 64 );
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
update_final_echo ( &ctx.echo, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#else
sph_echo512( &ctx.echo, hash, 64 );
sph_echo512_close( &ctx.echo, hash );
sph_echo512( &ctx.echo, hash, 64 );
sph_echo512_close( &ctx.echo, hash );
#endif
memcpy(output, hash, 32);
memcpy(output, hash, 32);
}
int scanhash_c11( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash[8] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
const uint32_t Htarg = ptarget[7];
uint32_t nonce = first_nonce;
int thr_id = mythr->id;
volatile uint8_t *restart = &(work_restart[thr_id].restart);

View File

@@ -13,17 +13,13 @@
#include "algo/skein/sph_skein.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/shavite/sph_shavite.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#ifdef __AES__
#include "algo/groestl/aes_ni/hash-groestl.h"
#else
#include "algo/groestl/sph_groestl.h"
#endif
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/luffa/luffa_for_sse2.h"
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread int permutation[TT10_FUNC_COUNT] = { 0 };
@@ -37,11 +33,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
#ifdef __AES__
hashState_groestl groestl;
#else
@@ -62,11 +54,6 @@ void init_tt10_ctx()
init_luffa( &tt10_ctx.luffa, 512 );
cubehashInit( &tt10_ctx.cube, 512, 16, 32 );
sph_shavite512_init( &tt10_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &tt10_ctx.simd );
#else
init_sd( &tt10_ctx.simd, 512 );
#endif
#ifdef __AES__
init_groestl( &tt10_ctx.groestl, 64 );
#else
@@ -222,27 +209,7 @@ void timetravel10_hash(void *output, const void *input)
}
break;
case 9:
if ( i == 0 )
{
memcpy( &ctx.simd, &tt10_mid.simd, sizeof tt10_mid.simd );
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) input + midlen, tail );
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hashB,
(const BitSequence *)input + midlen, tail*8 );
#endif
}
else
{
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_sd( &ctx.simd, (const BitSequence *)hashA, dataLen*8 );
final_sd( &ctx.simd, (BitSequence *)hashB );
#endif
}
simd512_ctx( &ctx.simd, hashB, hashA, dataLen );
break;
default:
break;
@@ -325,15 +292,6 @@ int scanhash_timetravel10( struct work *work, uint32_t max_nonce,
memcpy( &tt10_mid.shavite, &tt10_ctx.shavite, sizeof(tt10_mid.shavite ) );
sph_shavite512( &tt10_mid.shavite, endiandata, 64 );
break;
case 9:
memcpy( &tt10_mid.simd, &tt10_ctx.simd, sizeof(tt10_mid.simd ) );
#if defined(__aarch64__)
sph_simd512( &tt10_mid.simd, (const void*) endiandata, 64 );
sph_simd512_close( &tt10_mid.simd, hash);
#else
update_sd( &tt10_mid.simd, (const BitSequence *)endiandata, 512 );
#endif
break;
default:
break;
}

View File

@@ -22,12 +22,7 @@
#include "algo/echo/sph_echo.h"
#endif
#include "algo/luffa/luffa_for_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
typedef struct {
sph_blake512_context blake;
@@ -45,11 +40,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
} x11_ctx_holder;
x11_ctx_holder x11_ctx;
@@ -71,11 +62,6 @@ void init_x11_ctx()
init_luffa( &x11_ctx.luffa, 512 );
cubehashInit( &x11_ctx.cube, 512, 16, 32 );
sph_shavite512_init( &x11_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &x11_ctx.simd );
#else
init_sd( &x11_ctx.simd, 512 );
#endif
}
void x11_hash( void *state, const void *input )
@@ -118,13 +104,7 @@ void x11_hash( void *state, const void *input )
sph_shavite512( &ctx.shavite, hash, 64 );
sph_shavite512_close( &ctx.shavite, hash );
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hash,

View File

@@ -20,11 +20,7 @@
#include "algo/echo/sph_echo.h"
#endif
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#include "algo/luffa/luffa_for_sse2.h"
typedef struct {
@@ -37,11 +33,7 @@ typedef struct {
#endif
hashState_luffa luffa;
cubehashParam cube;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_blake512_context blake;
sph_bmw512_context bmw;
sph_skein512_context skein;
@@ -63,11 +55,6 @@ void init_x11evo_ctx()
#endif
init_luffa( &x11evo_ctx.luffa, 512 );
cubehashInit( &x11evo_ctx.cube, 512, 16, 32 );
#if defined(__aarch64__)
sph_simd512_init( &x11evo_ctx.simd );
#else
init_sd( &x11evo_ctx.simd, 512 );
#endif
sph_blake512_init( &x11evo_ctx.blake );
sph_bmw512_init( &x11evo_ctx.bmw );
sph_skein512_init( &x11evo_ctx.skein );
@@ -146,12 +133,7 @@ void x11evo_hash( void *state, const void *input )
sph_shavite512_close( &ctx.shavite, (char*)hash );
break;
case 9:
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (char*)hash, (const char*)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
break;
case 10:
#ifdef __AES__

View File

@@ -17,12 +17,7 @@
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
@@ -47,11 +42,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_gost512_context gost;
} x11gost_ctx_holder;
@@ -75,11 +66,6 @@ void init_x11gost_ctx()
sph_shavite512_init( &x11gost_ctx.shavite );
init_luffa( &x11gost_ctx.luffa, 512 );
cubehashInit( &x11gost_ctx.cube, 512, 16, 32 );
#if defined(__aarch64__)
sph_simd512_init(&x11gost_ctx.simd);
#else
init_sd( &x11gost_ctx.simd, 512 );
#endif
}
void x11gost_hash(void *output, const void *input)
@@ -123,13 +109,7 @@ void x11gost_hash(void *output, const void *input)
sph_shavite512( &ctx.shavite, hash, 64 );
sph_shavite512_close( &ctx.shavite, hash );
#if defined(__aarch64__)
sph_simd512 (&ctx.simd, hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hash,

View File

@@ -17,11 +17,7 @@
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#if defined(__AES__)
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
@@ -44,11 +40,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
} x12_ctx_holder;
@@ -68,14 +60,9 @@ void init_x12_ctx()
sph_groestl512_init(&x12_ctx.groestl);
sph_echo512_init(&x12_ctx.echo);
#endif
init_luffa( &x12_ctx.luffa, 512 );
init_luffa( &x12_ctx.luffa, 512 );
cubehashInit( &x12_ctx.cubehash, 512, 16, 32 );
sph_shavite512_init( &x12_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &x12_ctx.simd );
#else
init_sd( &x12_ctx.simd, 512 );
#endif
sph_hamsi512_init( &x12_ctx.hamsi );
};
@@ -101,13 +88,7 @@ void x12hash(void *output, const void *input)
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hashB);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hashB, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_sd( &ctx.simd, (const BitSequence *)hash, 512 );
final_sd( &ctx.simd, (BitSequence *)hash );
#endif
simd512_ctx( &ctx.simd, hash, hashB, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hashB,

View File

@@ -15,11 +15,7 @@
#include "algo/hamsi/sph_hamsi.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
@@ -48,11 +44,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
} x13_ctx_holder;
@@ -77,11 +69,6 @@ void init_x13_ctx()
init_luffa( &x13_ctx.luffa, 512 );
cubehashInit( &x13_ctx.cubehash, 512, 16, 32 );
sph_shavite512_init( &x13_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init(&x13_ctx.simd);
#else
init_sd( &x13_ctx.simd, 512 );
#endif
sph_hamsi512_init( &x13_ctx.hamsi );
};
@@ -121,13 +108,7 @@ void x13hash(void *output, const void *input)
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hash,

View File

@@ -15,11 +15,7 @@
#include "algo/shavite/sph_shavite.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
@@ -47,11 +43,7 @@ typedef struct {
sph_skein512_context skein;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
sm3_ctx_t sm3;
} x13bcd_ctx_holder;
@@ -76,11 +68,6 @@ void init_x13bcd_ctx()
sph_keccak512_init( &x13bcd_ctx.keccak );
cubehashInit( &x13bcd_ctx.cube,512,16,32 );
sph_shavite512_init( &x13bcd_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &x13bcd_ctx.simd );
#else
init_sd( &x13bcd_ctx.simd, 512 );
#endif
sm3_init( &x13bcd_ctx.sm3 );
sph_hamsi512_init( &x13bcd_ctx.hamsi );
};
@@ -127,13 +114,7 @@ void x13bcd_hash(void *output, const void *input)
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hash,

View File

@@ -17,11 +17,7 @@
#include "algo/fugue/sph_fugue.h"
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
@@ -46,11 +42,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sm3_ctx_t sm3;
sph_hamsi512_context hamsi;
sph_fugue512_context fugue;
@@ -75,11 +67,6 @@ void init_x13sm3_ctx()
init_luffa( &hsr_ctx.luffa,512 );
cubehashInit( &hsr_ctx.cube,512,16,32 );
sph_shavite512_init( &hsr_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &hsr_ctx.simd );
#else
init_sd( &hsr_ctx.simd,512 );
#endif
sm3_init( &hsr_ctx.sm3 );
sph_hamsi512_init( &hsr_ctx.hamsi );
sph_fugue512_init( &hsr_ctx.fugue );
@@ -123,13 +110,7 @@ void x13sm3_hash(void *output, const void *input)
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
//11---echo---
#ifdef __AES__

View File

@@ -15,11 +15,7 @@
#include "algo/hamsi/sph_hamsi.h"
#include "algo/shabal/sph_shabal.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
@@ -49,11 +45,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cube;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
} x14_ctx_holder;
@@ -79,11 +71,6 @@ void init_x14_ctx()
init_luffa( &x14_ctx.luffa,512 );
cubehashInit( &x14_ctx.cube,512,16,32 );
sph_shavite512_init( &x14_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &x14_ctx.simd );
#else
init_sd( &x14_ctx.simd, 512 );
#endif
sph_hamsi512_init( &x14_ctx.hamsi );
sph_shabal512_init( &x14_ctx.shabal );
};
@@ -124,13 +111,7 @@ void x14hash(void *output, const void *input)
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hash,

View File

@@ -17,12 +17,7 @@
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
@@ -52,11 +47,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
@@ -83,11 +74,6 @@ void init_x15_ctx()
init_luffa( &x15_ctx.luffa,512 );
cubehashInit( &x15_ctx.cubehash, 512, 16, 32 );
sph_shavite512_init( &x15_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &x15_ctx.simd );
#else
init_sd( &x15_ctx.simd, 512 );
#endif
sph_hamsi512_init( &x15_ctx.hamsi );
sph_shabal512_init( &x15_ctx.shabal );
sph_whirlpool_init( &x15_ctx.whirlpool );
@@ -131,13 +117,7 @@ void x15hash(void *output, const void *input)
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hash,

View File

@@ -236,7 +236,7 @@ int scanhash_hex( struct work *work, uint32_t max_nonce,
do
{
edata[19] = nonce;
if ( hex_hash( hash32, edata, thr_id ) );
if ( hex_hash( hash32, edata, thr_id ) )
if ( unlikely( valid_hash( hash32, ptarget ) && !bench ) )
{
be32enc( &pdata[19], nonce );

View File

@@ -526,7 +526,7 @@ int scanhash_x16r_8way( struct work *work, uint32_t max_nonce,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if( x16r_8way_hash( hash, vdata, thr_id ) );
if ( x16r_8way_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 8; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
@@ -952,7 +952,7 @@ int scanhash_x16r_4way( struct work *work, uint32_t max_nonce,
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( x16r_4way_hash( hash, vdata, thr_id ) );
if ( x16r_4way_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 4; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
@@ -1353,7 +1353,7 @@ int scanhash_x16r_2x64( struct work *work, uint32_t max_nonce,
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{
if ( x16r_2x64_hash( hash, vdata, thr_id ) );
if ( x16r_2x64_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 2; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{

View File

@@ -15,7 +15,6 @@
#include "algo/luffa/luffa_for_sse2.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/simd/sph_simd.h"
#include "algo/simd/nist.h"
#include "algo/echo/sph_echo.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"

View File

@@ -137,7 +137,7 @@ int scanhash_x20r_8x64( struct work *work, uint32_t max_nonce,
n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if( x20r_8x64_hash( hash, vdata, thr_id ) );
if ( x20r_8x64_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 8; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
@@ -205,7 +205,7 @@ int scanhash_x20r_4x64( struct work *work, uint32_t max_nonce,
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
do
{
if ( x20r_4x64_hash( hash, vdata, thr_id ) );
if ( x20r_4x64_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 4; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{
@@ -269,7 +269,7 @@ int scanhash_x20r_2x64( struct work *work, uint32_t max_nonce,
*noncev = v128_intrlv_blend_32( v128_set32( n+1, 0, n, 0 ), *noncev );
do
{
if ( x20r_2x64_hash( hash, vdata, thr_id ) );
if ( x20r_2x64_hash( hash, vdata, thr_id ) )
for ( int i = 0; i < 2; i++ )
if ( unlikely( valid_hash( hash + (i<<3), ptarget ) && !bench ) )
{

View File

@@ -18,11 +18,7 @@
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/haval/sph-haval.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#include "algo/sha/sph_sha2.h"
#if defined(__AES__)
#include "algo/echo/aes_ni/hash_api.h"
@@ -53,11 +49,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
@@ -86,11 +78,6 @@ void init_sonoa_ctx()
init_luffa( &sonoa_ctx.luffa, 512 );
cubehashInit( &sonoa_ctx.cubehash, 512, 16, 32 );
sph_shavite512_init( &sonoa_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &sonoa_ctx.simd );
#else
init_sd( &sonoa_ctx.simd, 512 );
#endif
sph_hamsi512_init( &sonoa_ctx.hamsi );
sph_shabal512_init( &sonoa_ctx.shabal );
sph_whirlpool_init( &sonoa_ctx.whirlpool );
@@ -134,13 +121,7 @@ int sonoa_hash( void *state, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
update_final_echo ( &ctx.echo, (BitSequence *)hash,
@@ -189,13 +170,7 @@ int sonoa_hash( void *state, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
@@ -249,13 +224,7 @@ int sonoa_hash( void *state, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
@@ -318,13 +287,7 @@ int sonoa_hash( void *state, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
@@ -410,13 +373,7 @@ int sonoa_hash( void *state, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512_init( &ctx.simd );
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
simd_full( &ctx.simd, hash, hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
@@ -483,13 +440,7 @@ int sonoa_hash( void *state, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512_init( &ctx.simd );
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
simd_full( &ctx.simd, hash, hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );
@@ -527,7 +478,6 @@ int sonoa_hash( void *state, const void *input, int thr_id )
sph_whirlpool_close(&ctx.whirlpool, hash);
if ( work_restart[thr_id].restart ) return 0;
//
sph_bmw512_init( &ctx.bmw);
sph_bmw512(&ctx.bmw, hash, 64);
@@ -565,13 +515,7 @@ int sonoa_hash( void *state, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, 64);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512_init( &ctx.simd );
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
simd_full( &ctx.simd, hash, hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
init_echo( &ctx.echo, 512 );

View File

@@ -18,11 +18,7 @@
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/haval/sph-haval.h"
#include "algo/cubehash/cubehash_sse2.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#include "algo/sha/sph_sha2.h"
#if defined(__AES__)
#include "algo/fugue/fugue-aesni.h"
@@ -34,7 +30,7 @@
#include "algo/fugue/sph_fugue.h"
#endif
#include "algo/blake/sph_blake.h"
#include "algo/cubehash/sph_cubehash.h"
//#include "algo/cubehash/sph_cubehash.h"
#include "algo/luffa/sph_luffa.h"
@@ -63,17 +59,9 @@ union _x17_context_overlay
#else
hashState_luffa luffa;
#endif
//#if defined(__aarch64__)
// sph_cubehash512_context cube;
//#else
cubehashParam cube;
//#endif
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
@@ -127,26 +115,13 @@ int x17_hash(void *output, const void *input, int thr_id )
luffa_full( &ctx.luffa, hash, 512, hash, 64 );
#endif
//#if defined(__aarch64__)
// sph_cubehash512_init(&ctx.cube);
// sph_cubehash512(&ctx.cube, (const void*) hash, 64);
// sph_cubehash512_close(&ctx.cube, hash);
//#else
cubehash_full( &ctx.cube, hash, 512, hash, 64 );
//#endif
sph_shavite512_init( &ctx.shavite );
sph_shavite512( &ctx.shavite, hash, 64);
sph_shavite512_close( &ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512_init( &ctx.simd );
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
simd_full( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, 512 );
#endif
simd512_ctx( &ctx.simd, hash, hash, 64 );
#if defined(__AES__)
echo_full( &ctx.echo, (BitSequence *)hash, 512,

View File

@@ -17,11 +17,7 @@
#include "algo/shabal/sph_shabal.h"
#include "algo/whirlpool/sph_whirlpool.h"
#include "algo/haval/sph-haval.h"
#if defined(__aarch64__)
#include "algo/simd/sph_simd.h"
#else
#include "algo/simd/nist.h"
#endif
#include "algo/simd/simd-hash-2way.h"
#include "algo/cubehash/cubehash_sse2.h"
#include "algo/sha/sph_sha2.h"
#if defined(__AES__)
@@ -45,11 +41,7 @@ typedef struct {
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
#if defined(__aarch64__)
sph_simd512_context simd;
#else
hashState_sd simd;
#endif
simd512_context simd;
sph_hamsi512_context hamsi;
sph_shabal512_context shabal;
sph_whirlpool_context whirlpool;
@@ -78,11 +70,6 @@ void init_xevan_ctx()
init_luffa( &xevan_ctx.luffa, 512 );
cubehashInit( &xevan_ctx.cubehash, 512, 16, 32 );
sph_shavite512_init( &xevan_ctx.shavite );
#if defined(__aarch64__)
sph_simd512_init( &xevan_ctx.simd );
#else
init_sd( &xevan_ctx.simd, 512 );
#endif
sph_hamsi512_init( &xevan_ctx.hamsi );
sph_shabal512_init( &xevan_ctx.shabal );
sph_whirlpool_init( &xevan_ctx.whirlpool );
@@ -137,13 +124,7 @@ int xevan_hash(void *output, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, dataLen);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512( &ctx.simd, (const void*) hash, dataLen );
sph_simd512_close( &ctx.simd, hash );
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, dataLen*8 );
#endif
simd512_ctx( &ctx.simd, hash, hash, dataLen );
#if defined(__AES__)
update_final_echo( &ctx.echo, (BitSequence *) hash,
@@ -210,13 +191,14 @@ int xevan_hash(void *output, const void *input, int thr_id )
sph_shavite512(&ctx.shavite, hash, dataLen);
sph_shavite512_close(&ctx.shavite, hash);
#if defined(__aarch64__)
sph_simd512(&ctx.simd, (const void*) hash, 64);
sph_simd512_close(&ctx.simd, hash);
#else
update_final_sd( &ctx.simd, (BitSequence *)hash,
(const BitSequence *)hash, dataLen*8 );
#endif
simd512_ctx( &ctx.simd, hash, hash, dataLen );
//#if defined(__aarch64__)
// sph_simd512(&ctx.simd, (const void*) hash, 64);
// sph_simd512_close(&ctx.simd, hash);
//#else
// update_final_sd( &ctx.simd, (BitSequence *)hash,
// (const BitSequence *)hash, dataLen*8 );
//#endif
#if defined(__AES__)
update_final_echo( &ctx.echo, (BitSequence *) hash,

View File

@@ -18,7 +18,6 @@
#include "algo/cubehash/cube-hash-2way.h"
#include "algo/shavite/shavite-hash-2way.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/nist.h"
#include "algo/simd/simd-hash-2way.h"
#include "algo/fugue/fugue-aesni.h"
#include "algo/whirlpool/sph_whirlpool.h"

View File

@@ -1,14 +0,0 @@
#!/bin/bash
# Linux build
make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer

View File

@@ -15,51 +15,43 @@ rm -f config.status
./autogen.sh || echo done
CFLAGS="-O3 -march=armv9-a+crypto+sha3 -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv9-crypto-sha3
make clean || echo clean
CFLAGS="-O3 -march=armv9-a+crypto -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv9-crypto
make clean || echo clean
CFLAGS="-O3 -march=armv9-a -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv9
# SVE2 available in armv8.5
make clean || echo clean
CFLAGS="-O3 -march=armv8.5-a+crypto+sha3+sve2 -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv8.5-crypto-sha3-sve2
# SHA3 available in armv8.4
make clean || echo clean
CFLAGS="-O3 -march=armv8.4-a+crypto+sha3 -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv8.4-crypto-sha3
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=armv8-a+crypto -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv8-crypto
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=armv8-a -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer
mv cpuminer cpuminer-armv8
make clean || echo clean
rm -f config.status
CFLAGS="-O3 -march=native -Wall -flax-vector-conversions" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer

View File

@@ -1,10 +0,0 @@
#!/bin/bash
#
# Compile on Windows using MSYS2 and MinGW.
make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
CFLAGS="-O3 -march=native -Wall -D_WIN32_WINNT=0x0601" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer

View File

@@ -1,20 +1,9 @@
#!/bin/bash
#if [ "$OS" = "Windows_NT" ]; then
# ./mingw64.sh
# exit 0
#fi
# Linux build
#!/bin/sh
make distclean || echo clean
rm -f config.status
./autogen.sh || echo done
#CFLAGS="-O3 -march=native -Wall" ./configure --with-curl --with-crypto=$HOME/usr
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
make -j $(nproc)
strip -s cpuminer
#strip -s cpuminer

View File

@@ -1,4 +1,4 @@
#!/bin/bash
#!/bin/sh
#
# make clean and rm all the targetted executables.

View File

@@ -3,7 +3,7 @@
#ifdef WIN32
#if _WIN32_WINNT==0x0601 // Windows 7
#if _WIN32_WINNT>=0x0601 // Windows 7
#define WINDOWS_CPU_GROUPS_ENABLED 1
#endif

2090
configure vendored

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [24.7])
AC_INIT([cpuminer-opt], [25.2])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
@@ -42,14 +42,11 @@ AC_FUNC_ALLOCA
AC_CHECK_FUNCS([getopt_long])
case $target in
i*86-*-*)
have_x86=true
;;
x86_64-*-*|amd64-*-*)
have_x86_64=true
;;
arm*-*-*)
have_arm=true
aarch64*-*-*|arm64*-*-*)
have_arm64=true
;;
powerpc*-*-*)
have_ppc=true
@@ -65,51 +62,18 @@ case $target in
PTHREAD_FLAGS=""
WS2_LIBS="-lws2_32"
;;
*-apple-*)
have_apple=true
;;
esac
AC_ARG_ENABLE([assembly],
AS_HELP_STRING([--disable-assembly], [disable assembly-language routines]))
if test x$enable_assembly != xno; then
AC_DEFINE([USE_ASM], [1], [Define to 1 if assembly routines are wanted.])
fi
if test x$enable_assembly != xno -a x$have_x86_64 = xtrue
then
AC_MSG_CHECKING(whether we can compile AVX code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vmovdqa %ymm0, %ymm1");])],
AC_DEFINE(USE_AVX, 1, [Define to 1 if AVX assembly is available.])
AC_MSG_RESULT(yes)
AC_MSG_CHECKING(whether we can compile XOP code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vprotd \$7, %xmm0, %xmm1");])],
AC_DEFINE(USE_XOP, 1, [Define to 1 if XOP assembly is available.])
AC_MSG_RESULT(yes)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the XOP instruction set.])
)
AC_MSG_CHECKING(whether we can compile AVX2 code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpaddd %ymm0, %ymm1, %ymm2");])],
AC_DEFINE(USE_AVX2, 1, [Define to 1 if AVX2 assembly is available.])
AC_MSG_RESULT(yes)
AC_MSG_CHECKING(whether we can compile AVX512 code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpaddd %zmm0, %zmm1, %zmm2{%k1}");])],
AC_DEFINE(USE_AVX512, 1, [Define to 1 if AVX512 assembly is available.])
AC_MSG_RESULT(yes)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the AVX512 instruction set.])
)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the AVX2 instruction set.])
)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the AVX instruction set.])
)
fi
# jansson test fails on Linux/Mingw, handled in Makefile.am.
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthread",
@@ -118,9 +82,6 @@ AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthread",
AC_CHECK_LIB([pthreadGC], [pthread_create], PTHREAD_LIBS="-lpthreadGC"
))))
#LDFLAGS="$PTHREAD_LDFLAGS $LDFLAGS"
# PTHREAD_LIBS="$PTHREAD_LIBS"
AC_MSG_CHECKING(whether __uint128_t is supported)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([static __uint128_t i = 100;])],
AC_DEFINE(USE_INT128, 1, [Define if __uint128_t is available])
@@ -133,16 +94,10 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([static __uint128_t i = 100;])],
AM_CONDITIONAL([WANT_JANSSON], [test x$request_jansson = xtrue])
AM_CONDITIONAL([HAVE_WINDOWS], [test x$have_win32 = xtrue])
AM_CONDITIONAL([USE_ASM], [test x$enable_assembly != xno])
AM_CONDITIONAL([ARCH_x86], [test x$have_x86 = xtrue])
AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue])
AM_CONDITIONAL([ARCH_ARM], [test x$have_arm = xtrue])
AM_CONDITIONAL([ARCH_ARM64], [test x$have_arm64 = xtrue])
AM_CONDITIONAL([MINGW], [test "x$OS" = "xWindows_NT"])
if test x$request_jansson = xtrue ; then
JANSSON_LIBS="compat/jansson/libjansson.a"
else
JANSSON_LIBS=-ljansson
fi
AM_CONDITIONAL([HAVE_APPLE], [test x$have_apple = xtrue])
# libcurl install path (for mingw : --with-curl=/usr/local)
AC_ARG_WITH([curl],
@@ -155,25 +110,10 @@ if test -n "$with_curl" ; then
LIBCURL="-lcurl -lz"
fi
# SSL install path (for mingw : --with-crypto=/usr/local/ssl)
AC_ARG_WITH([crypto],
[ --with-crypto=PATH prefix where openssl crypto is installed [default=/usr]])
if test -n "$with_crypto" ; then
LIBCURL_CFLAGS="$LIBCURL_CFLAGS -I$with_crypto/include"
LIBCURL_CPPFLAGS="$LIBCURL_CPPFLAGS -I$with_crypto/include"
LIBCURL_LDFLAGS="-L$with_crypto/lib $LIBCURL_LDFLAGS"
LIBCURL="$LIBCURL -lssl -lcrypto"
fi
CFLAGS="$CFLAGS $LIBCURL_CFLAGS"
CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS"
LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS"
#AC_CHECK_LIB([z],[gzopen],[],[])
#AC_CHECK_LIB([crypto],[OPENSSL_init], crypto=yes, [AC_MSG_ERROR([OpenSSL crypto library required])])
#AC_CHECK_LIB([ssl],[SSL_new], ssl=yes, ssl=no)
# AC_CHECK_LIB([curl], [curl_multi_timeout],
# have_libcurl=yes,
# have_libcurl=no AC_MSG_ERROR([curl library required])

View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 24.7.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 25.2.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='24.7'
PACKAGE_STRING='cpuminer-opt 24.7'
PACKAGE_VERSION='25.2'
PACKAGE_STRING='cpuminer-opt 25.2'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -657,14 +657,14 @@ JANSSON_LIBS
LIBCURL_CPPFLAGS
LIBCURL_CFLAGS
LIBCURL
HAVE_APPLE_FALSE
HAVE_APPLE_TRUE
MINGW_FALSE
MINGW_TRUE
ARCH_ARM_FALSE
ARCH_ARM_TRUE
ARCH_ARM64_FALSE
ARCH_ARM64_TRUE
ARCH_x86_64_FALSE
ARCH_x86_64_TRUE
ARCH_x86_FALSE
ARCH_x86_TRUE
USE_ASM_FALSE
USE_ASM_TRUE
HAVE_WINDOWS_FALSE
@@ -796,7 +796,6 @@ enable_maintainer_mode
enable_dependency_tracking
enable_assembly
with_curl
with_crypto
'
ac_precious_vars='build_alias
host_alias
@@ -1360,7 +1359,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 24.7 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 25.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1432,7 +1431,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 24.7:";;
short | recursive ) echo "Configuration of cpuminer-opt 25.2:";;
esac
cat <<\_ACEOF
@@ -1455,7 +1454,6 @@ Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
--with-curl=PATH prefix where curl is installed default=/usr
--with-crypto=PATH prefix where openssl crypto is installed default=/usr
Some influential environment variables:
CC C compiler command
@@ -1538,7 +1536,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 24.7
cpuminer-opt configure 25.2
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1985,7 +1983,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 24.7, which was
It was created by cpuminer-opt $as_me 25.2, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3593,7 +3591,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='24.7'
VERSION='25.2'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -5810,11 +5808,11 @@ if test x$ac_prog_cxx_stdcxx = xno
then :
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5
printf %s "checking for $CXX option to enable C++11 features... " >&6; }
if test ${ac_cv_prog_cxx_11+y}
if test ${ac_cv_prog_cxx_cxx11+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_cv_prog_cxx_11=no
ac_cv_prog_cxx_cxx11=no
ac_save_CXX=$CXX
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -5856,11 +5854,11 @@ if test x$ac_prog_cxx_stdcxx = xno
then :
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5
printf %s "checking for $CXX option to enable C++98 features... " >&6; }
if test ${ac_cv_prog_cxx_98+y}
if test ${ac_cv_prog_cxx_cxx98+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_cv_prog_cxx_98=no
ac_cv_prog_cxx_cxx98=no
ac_save_CXX=$CXX
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -6502,33 +6500,30 @@ then :
fi
#MINGW_TARGET=`$CC -dumpmachine 2>&1`
#case $MINGW_TARGET in
case $target in
arm*-*-*)
have_arm=true
;;
i*86-*-mingw*)
have_x86=true
have_win32=true
CFLAGS="-Icompat/pthreads $CFLAGS"
PTHREAD_LDFLAGS="-Lcompat/pthreads/x86"
WS2_LIBS="-lws2_32"
;;
x86_64-*-mingw*|amd64-*-mingw*)
have_x86_64=true
have_win32=true
CFLAGS="-Icompat/pthreads $CFLAGS"
PTHREAD_LDFLAGS="-Lcompat/pthreads/x64"
# SHOULD BE AT END! after -lcrypto #
WS2_LIBS="-L/mingw/x86_64-w64-mingw32/lib -lws2_32"
;;
i*86-*-*)
have_x86=true
;;
x86_64-*-*|amd64-*-*)
have_x86_64=true
;;
aarch64*-*-*|arm64*-*-*)
have_arm64=true
;;
powerpc*-*-*)
have_ppc=true
;;
esac
PTHREAD_FLAGS="-pthread"
WS2_LIBS=""
case $target in
*-*-mingw*)
have_win32=true
PTHREAD_FLAGS=""
WS2_LIBS="-lws2_32"
;;
*-apple-*)
have_apple=true
;;
esac
# Check whether --enable-assembly was given.
@@ -6543,126 +6538,7 @@ printf "%s\n" "#define USE_ASM 1" >>confdefs.h
fi
if test x$enable_assembly != xno -a x$have_x86_64 = xtrue
then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX code" >&5
printf %s "checking whether we can compile AVX code... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main (void)
{
asm ("vmovdqa %ymm0, %ymm1");
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"
then :
printf "%s\n" "#define USE_AVX 1" >>confdefs.h
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
printf "%s\n" "yes" >&6; }
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can compile XOP code" >&5
printf %s "checking whether we can compile XOP code... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main (void)
{
asm ("vprotd \$7, %xmm0, %xmm1");
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"
then :
printf "%s\n" "#define USE_XOP 1" >>confdefs.h
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
printf "%s\n" "yes" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the XOP instruction set." >&5
printf "%s\n" "$as_me: WARNING: The assembler does not support the XOP instruction set." >&2;}
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX2 code" >&5
printf %s "checking whether we can compile AVX2 code... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main (void)
{
asm ("vpaddd %ymm0, %ymm1, %ymm2");
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"
then :
printf "%s\n" "#define USE_AVX2 1" >>confdefs.h
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
printf "%s\n" "yes" >&6; }
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we can compile AVX512 code" >&5
printf %s "checking whether we can compile AVX512 code... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main (void)
{
asm ("vpaddd %zmm0, %zmm1, %zmm2{%k1}");
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"
then :
printf "%s\n" "#define USE_AVX512 1" >>confdefs.h
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
printf "%s\n" "yes" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX512 instruction set." >&5
printf "%s\n" "$as_me: WARNING: The assembler does not support the AVX512 instruction set." >&2;}
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX2 instruction set." >&5
printf "%s\n" "$as_me: WARNING: The assembler does not support the AVX2 instruction set." >&2;}
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: The assembler does not support the AVX instruction set." >&5
printf "%s\n" "$as_me: WARNING: The assembler does not support the AVX instruction set." >&2;}
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
fi
# jansson test fails on Linux/Mingw, handled in Makefile.am.
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for json_loads in -ljansson" >&5
printf %s "checking for json_loads in -ljansson... " >&6; }
if test ${ac_cv_lib_jansson_json_loads+y}
@@ -6706,51 +6582,7 @@ else $as_nop
fi
# GC2 for GNU static
if test "x$have_win32" = "xtrue" ; then
# MinGW
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
printf %s "checking for pthread_create in -lpthread... " >&6; }
if test ${ac_cv_lib_pthread_pthread_create+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_check_lib_save_LIBS=$LIBS
LIBS="-lpthread $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
char pthread_create ();
int
main (void)
{
return pthread_create ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
ac_cv_lib_pthread_pthread_create=yes
else $as_nop
ac_cv_lib_pthread_pthread_create=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5
printf "%s\n" "$ac_cv_lib_pthread_pthread_create" >&6; }
if test "x$ac_cv_lib_pthread_pthread_create" = xyes
then :
PTHREAD_LIBS="-lpthreadGC2"
fi
else
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
printf %s "checking for pthread_create in -lpthread... " >&6; }
if test ${ac_cv_lib_pthread_pthread_create+y}
then :
@@ -6788,12 +6620,132 @@ printf "%s\n" "$ac_cv_lib_pthread_pthread_create" >&6; }
if test "x$ac_cv_lib_pthread_pthread_create" = xyes
then :
PTHREAD_LIBS="-lpthread"
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthreadGC2" >&5
printf %s "checking for pthread_create in -lpthreadGC2... " >&6; }
if test ${ac_cv_lib_pthreadGC2_pthread_create+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_check_lib_save_LIBS=$LIBS
LIBS="-lpthreadGC2 $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
char pthread_create ();
int
main (void)
{
return pthread_create ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
ac_cv_lib_pthreadGC2_pthread_create=yes
else $as_nop
ac_cv_lib_pthreadGC2_pthread_create=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthreadGC2_pthread_create" >&5
printf "%s\n" "$ac_cv_lib_pthreadGC2_pthread_create" >&6; }
if test "x$ac_cv_lib_pthreadGC2_pthread_create" = xyes
then :
PTHREAD_LIBS="-lpthreadGC2"
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthreadGC1" >&5
printf %s "checking for pthread_create in -lpthreadGC1... " >&6; }
if test ${ac_cv_lib_pthreadGC1_pthread_create+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_check_lib_save_LIBS=$LIBS
LIBS="-lpthreadGC1 $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
char pthread_create ();
int
main (void)
{
return pthread_create ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
ac_cv_lib_pthreadGC1_pthread_create=yes
else $as_nop
ac_cv_lib_pthreadGC1_pthread_create=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthreadGC1_pthread_create" >&5
printf "%s\n" "$ac_cv_lib_pthreadGC1_pthread_create" >&6; }
if test "x$ac_cv_lib_pthreadGC1_pthread_create" = xyes
then :
PTHREAD_LIBS="-lpthreadGC1"
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthreadGC" >&5
printf %s "checking for pthread_create in -lpthreadGC... " >&6; }
if test ${ac_cv_lib_pthreadGC_pthread_create+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_check_lib_save_LIBS=$LIBS
LIBS="-lpthreadGC $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
char pthread_create ();
int
main (void)
{
return pthread_create ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
ac_cv_lib_pthreadGC_pthread_create=yes
else $as_nop
ac_cv_lib_pthreadGC_pthread_create=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthreadGC_pthread_create" >&5
printf "%s\n" "$ac_cv_lib_pthreadGC_pthread_create" >&6; }
if test "x$ac_cv_lib_pthreadGC_pthread_create" = xyes
then :
PTHREAD_LIBS="-lpthreadGC"
fi
fi
fi
fi
LDFLAGS="$PTHREAD_LDFLAGS $LDFLAGS"
# PTHREAD_LIBS="$PTHREAD_LIBS"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether __uint128_t is supported" >&5
printf %s "checking whether __uint128_t is supported... " >&6; }
@@ -6848,14 +6800,6 @@ else
USE_ASM_FALSE=
fi
if test x$have_x86 = xtrue; then
ARCH_x86_TRUE=
ARCH_x86_FALSE='#'
else
ARCH_x86_TRUE='#'
ARCH_x86_FALSE=
fi
if test x$have_x86_64 = xtrue; then
ARCH_x86_64_TRUE=
ARCH_x86_64_FALSE='#'
@@ -6864,12 +6808,12 @@ else
ARCH_x86_64_FALSE=
fi
if test x$have_arm = xtrue; then
ARCH_ARM_TRUE=
ARCH_ARM_FALSE='#'
if test x$have_arm64 = xtrue; then
ARCH_ARM64_TRUE=
ARCH_ARM64_FALSE='#'
else
ARCH_ARM_TRUE='#'
ARCH_ARM_FALSE=
ARCH_ARM64_TRUE='#'
ARCH_ARM64_FALSE=
fi
if test "x$OS" = "xWindows_NT"; then
@@ -6880,13 +6824,15 @@ else
MINGW_FALSE=
fi
if test x$request_jansson = xtrue ; then
JANSSON_LIBS="compat/jansson/libjansson.a"
if test x$have_apple = xtrue; then
HAVE_APPLE_TRUE=
HAVE_APPLE_FALSE='#'
else
JANSSON_LIBS=-ljansson
HAVE_APPLE_TRUE='#'
HAVE_APPLE_FALSE=
fi
# libcurl install path (for mingw : --with-curl=/usr/local)
# Check whether --with-curl was given.
@@ -6903,30 +6849,10 @@ if test -n "$with_curl" ; then
LIBCURL="-lcurl -lz"
fi
# SSL install path (for mingw : --with-crypto=/usr/local/ssl)
# Check whether --with-crypto was given.
if test ${with_crypto+y}
then :
withval=$with_crypto;
fi
if test -n "$with_crypto" ; then
LIBCURL_CFLAGS="$LIBCURL_CFLAGS -I$with_crypto/include"
LIBCURL_CPPFLAGS="$LIBCURL_CPPFLAGS -I$with_crypto/include"
LIBCURL_LDFLAGS="-L$with_crypto/lib $LIBCURL_LDFLAGS"
LIBCURL="$LIBCURL -lssl -lcrypto"
fi
CFLAGS="$CFLAGS $LIBCURL_CFLAGS"
CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS"
LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS"
#AC_CHECK_LIB([z],[gzopen],[],[])
#AC_CHECK_LIB([crypto],[OPENSSL_init], crypto=yes, [AC_MSG_ERROR([OpenSSL crypto library required])])
#AC_CHECK_LIB([ssl],[SSL_new], ssl=yes, ssl=no)
# AC_CHECK_LIB([curl], [curl_multi_timeout],
# have_libcurl=yes,
# have_libcurl=no AC_MSG_ERROR([curl library required])
@@ -7103,22 +7029,22 @@ if test -z "${USE_ASM_TRUE}" && test -z "${USE_ASM_FALSE}"; then
as_fn_error $? "conditional \"USE_ASM\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${ARCH_x86_TRUE}" && test -z "${ARCH_x86_FALSE}"; then
as_fn_error $? "conditional \"ARCH_x86\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${ARCH_x86_64_TRUE}" && test -z "${ARCH_x86_64_FALSE}"; then
as_fn_error $? "conditional \"ARCH_x86_64\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${ARCH_ARM_TRUE}" && test -z "${ARCH_ARM_FALSE}"; then
as_fn_error $? "conditional \"ARCH_ARM\" was never defined.
if test -z "${ARCH_ARM64_TRUE}" && test -z "${ARCH_ARM64_FALSE}"; then
as_fn_error $? "conditional \"ARCH_ARM64\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${MINGW_TRUE}" && test -z "${MINGW_FALSE}"; then
as_fn_error $? "conditional \"MINGW\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${HAVE_APPLE_TRUE}" && test -z "${HAVE_APPLE_FALSE}"; then
as_fn_error $? "conditional \"HAVE_APPLE\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
: "${CONFIG_STATUS=./config.status}"
ac_write_fail=0
@@ -7509,7 +7435,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 24.7, which was
This file was extended by cpuminer-opt $as_me 25.2, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -7577,7 +7503,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
cpuminer-opt config.status 24.7
cpuminer-opt config.status 25.2
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@@ -206,7 +206,7 @@ static uint32_t last_block_height = 0;
static double highest_share = 0; // highest accepted share diff
static double lowest_share = 9e99; // lowest accepted share diff
static double last_targetdiff = 0.;
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32) || defined(__APPLE__))
static uint32_t hi_temp = 0;
static uint32_t prev_temp = 0;
#endif
@@ -286,15 +286,15 @@ static inline void drop_policy(void) { }
static void affine_to_cpu( struct thr_info *thr )
{
int thread = thr->id;
unsigned long last_error;
bool ok;
unsigned long last_error = 0;
bool ok = true;
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
unsigned long group_size = GetActiveProcessorCount( 0 );
unsigned long group = thread / group_size;
unsigned long cpu = thread_affinity_map[ thread % group_size ];
GROUP_AFFINITY affinity;
GROUP_AFFINITY affinity = {0};
affinity.Group = group;
affinity.Mask = 1ULL << cpu;
@@ -320,8 +320,7 @@ static void affine_to_cpu( struct thr_info *thr )
{
last_error = GetLastError();
if ( !thread )
applog( LOG_WARNING, "Set affinity returned error 0x%x for thread %d",
last_error, thread );
applog( LOG_WARNING, "Set affinity returned error 0x%x", last_error );
}
}
@@ -992,19 +991,19 @@ void report_summary_log( bool force )
if ( rejected_share_count > 10 )
{
if ( rejected_share_count > ( submitted_share_count * .5 ) )
if ( rejected_share_count > ( submitted_share_count / 2 ) )
{
applog(LOG_ERR,"Excessive rejected share rate, exiting...");
exit(1);
}
else if ( rejected_share_count > ( submitted_share_count * .1 ) )
else if ( rejected_share_count > ( submitted_share_count / 10 ) )
applog(LOG_WARNING,"High rejected share rate, check settings.");
}
gettimeofday( &now, NULL );
timeval_subtract( &et, &now, &five_min_start );
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32) || defined(__APPLE__))
// Display CPU temperature and clock rate.
int curr_temp = cpu_temp(0);
@@ -1013,8 +1012,9 @@ void report_summary_log( bool force )
if ( !opt_quiet || ( curr_temp >= 80 ) )
{
int wait_time = curr_temp >= 90 ? 5 : curr_temp >= 80 ? 30 :
curr_temp >= 70 ? 60 : 120;
int wait_time = curr_temp >= 90 ? 5
: curr_temp >= 80 ? 30
: curr_temp >= 70 ? 60 : 120;
timeval_subtract( &diff, &now, &cpu_temp_time );
if ( ( diff.tv_sec > wait_time )
|| ( ( curr_temp > prev_temp ) && ( curr_temp >= 75 ) ) )
@@ -1912,7 +1912,7 @@ static bool wanna_mine(int thr_id)
{
bool state = true;
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32))
#if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32) || defined(__APPLE__))
if (opt_max_temp > 0.0)
{
@@ -2400,7 +2400,7 @@ static void *miner_thread( void *userdata )
{
scale_hash_for_display( &hashrate, hr_units );
sprintf( hr, "%.2f", hashrate );
#if (defined(_WIN64) || defined(__WINDOWS__) || defined(_WIN32))
#if (defined(_WIN64) || defined(__WINDOWS__) || defined(_WIN32) || defined(__APPLE__))
applog( LOG_NOTICE, "Total: %s %sH/s", hr, hr_units );
#else
float lo_freq = 0., hi_freq = 0.;
@@ -2912,6 +2912,7 @@ static bool cpu_capability( bool display_only )
sw_arm_arch = __ARM_ARCH;
#endif
#endif
// x86_64 only
#if defined(__SSE2__)
sw_has_sse2 = true;
@@ -2940,9 +2941,10 @@ static bool cpu_capability( bool display_only )
#if defined(__AVX10_1_512__)
sw_has_avx10_512 = true;
#endif
// x86_64 or AArch64
#if defined(__AES__) || defined(__ARM_FEATURE_AES)
sw_has_aes = true;
sw_has_aes = true;
#endif
#ifdef __VAES__
sw_has_vaes = true;
@@ -2953,6 +2955,7 @@ static bool cpu_capability( bool display_only )
#if defined(__SHA512__) || defined(__ARM_FEATURE_SHA512)
sw_has_sha512 = true;
#endif
// AArch64 only
#if defined(__ARM_NEON)
sw_has_neon = true;
@@ -2970,27 +2973,33 @@ static bool cpu_capability( bool display_only )
sw_has_sme2 = true;
#endif
// CPU
cpu_brand_string( cpu_brand );
printf( "CPU: %s\n", cpu_brand );
printf("SW built on " __DATE__
#ifdef _MSC_VER
" with VC++ 2013\n");
// Build
printf( "SW built on " __DATE__
#if defined(__clang__)
" with CLANG-%d.%d.%d", __clang_major__, __clang_minor__,
__clang_patchlevel__ );
#elif defined(__GNUC__)
" with GCC-");
printf("%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
#else
printf("\n");
" with GCC-%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ );
#endif
// OS
#if defined(__linux)
printf(" Linux\n");
#elif defined(WIN32)
printf(" Windows\n");
printf(" Windows");
#if defined(__MINGW64__)
printf(" MinGW-w64\n");
#else
printf("\n");
#endif
#elif defined(__APPLE__)
printf(" MacOS\n");
#elif defined(__unix__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
printf(" Unix\n");
#elif defined(__bsd__) || defined(__unix__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
printf(" BSD/Unix\n");
#else
printf("\n");
#endif
@@ -3050,7 +3059,7 @@ static bool cpu_capability( bool display_only )
if ( !display_only )
{
printf("\nAlgo features: ");
printf("\nAlgo features:");
if ( algo_features == EMPTY_SET ) printf( " None" );
else
{
@@ -3058,7 +3067,7 @@ static bool cpu_capability( bool display_only )
else if ( algo_has_avx2 ) printf( " AVX2 " );
else if ( algo_has_sse42 ) printf( " SSE4.2" );
else if ( algo_has_sse2 ) printf( " SSE2 " );
if ( algo_has_neon ) printf( " NEON " );
if ( algo_has_neon ) printf( " NEON" );
if ( algo_has_vaes ) printf( " VAES" );
else if ( algo_has_aes ) printf( " AES" );
if ( algo_has_sha512 ) printf( " SHA512" );
@@ -3103,6 +3112,7 @@ static bool cpu_capability( bool display_only )
return true;
}
/*
void show_version_and_exit(void)
{
printf("\n built on " __DATE__
@@ -3159,12 +3169,11 @@ void show_version_and_exit(void)
printf("\n");
exit(0);
}
*/
void show_usage_and_exit(int status)
{
if (status)
fprintf(stderr, "Try `--help' for more information.\n");
// fprintf(stderr, "Try `" PACKAGE_NAME " --help' for more information.\n");
else
printf(usage);
exit(status);
@@ -3180,7 +3189,6 @@ void parse_arg(int key, char *arg )
{
char *p;
int v, i;
// uint64_t ul;
double d;
switch( key )
@@ -3324,7 +3332,8 @@ void parse_arg(int key, char *arg )
free(rpc_user);
rpc_user = strdup(arg);
break;
case 'o': // url
case 'o': // url
{
char *ap, *hp;
ap = strstr( arg, "://" );
@@ -3389,7 +3398,8 @@ void parse_arg(int key, char *arg )
have_stratum = !opt_benchmark && !strncasecmp( rpc_url, "stratum", 7 );
break;
}
case 'O': // userpass
case 'O': // userpass
p = strchr(arg, ':');
if (!p)
{
@@ -3549,10 +3559,10 @@ void parse_arg(int key, char *arg )
case 1029: // stratum-keepalive
opt_stratum_keepalive = true;
break;
case 'V':
case 'V': // version
display_cpu_capability();
exit(0);
case 'h':
case 'h': // help
show_usage_and_exit(0);
default:
@@ -3691,9 +3701,6 @@ int main(int argc, char *argv[])
{
int cpus = GetActiveProcessorCount( i );
num_cpus += cpus;
// if (opt_debug)
// applog( LOG_INFO, "Found %d CPUs in CPU group %d", cpus, i );
}
#else
@@ -3864,12 +3871,23 @@ int main(int argc, char *argv[])
}
#endif
#if defined(WIN32) && defined(WINDOWS_CPU_GROUPS_ENABLED)
if ( opt_debug || ( !opt_quiet && num_cpugroups > 1 ) )
applog( LOG_INFO, "Found %d CPUs in %d groups",
num_cpus, num_cpugroups );
#if defined(WIN32)
#if defined(_WIN32_WINNT)
if (opt_debug)
applog( LOG_INFO, "_WIN232_WINNT = 0x%04x", _WIN32_WINNT );
#else
if (opt_debug)
applog( LOG_INFO, "_WIN232_WINNT undefined." );
#endif
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
if ( opt_debug || ( !opt_quiet && num_cpugroups > 1 ) )
applog( LOG_INFO, "Found %d CPUs in %d groups",
num_cpus, num_cpugroups );
#endif
#endif
conditional_state = malloc( opt_n_threads * ((sizeof(bool)) ) );
memset( conditional_state, 0, opt_n_threads * ((sizeof(bool)) ) );
@@ -3890,7 +3908,7 @@ int main(int argc, char *argv[])
if ( cpu < num_cpus ) active_cpus++;
}
if ( opt_n_threads > active_cpus )
applog( LOG_WARNING, "Affinity: more threads (%d) than active CPUs (%d)", opt_n_threads, active_cpus );
applog( LOG_WARNING, "More miner threads (%d) than active CPUs in affinity mask (%d)", opt_n_threads, active_cpus );
if ( !opt_quiet )
{
char affinity_mask[64];

10
miner.h
View File

@@ -3,10 +3,7 @@
#include <cpuminer-config.h>
#if !( defined(__SSE2__) || ( defined(__aarch64__) && defined(__ARM_NEON) ) )
#warning "Unknown or unsupported CPU, requires x86_64 with SSE2 or AArch64 with NEON."
#endif
// CPU architecture
#if defined(__x86_64__)
#define USER_AGENT_ARCH "x64" // Intel, AMD x86_64
#elif defined(__aarch64__)
@@ -17,14 +14,15 @@
#define USER_AGENT_ARCH
#endif
// Operating system
// __APPLE__ includes MacOS & IOS, no MacOS only macros found.
#if defined(__linux)
#define USER_AGENT_OS "L" // GNU Linux
#elif defined(WIN32)
#define USER_AGENT_OS "W" // MS Windows
#elif defined(__APPLE__)
#define USER_AGENT_OS "M" // Apple MacOS
// is there a generic BSD macro?
#elif defined(__unix__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
#elif defined(__bsd__) || defined(__unix__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
#define USER_AGENT_OS "U" // BSD unix
#else
#define USER_AGENT_OS

View File

@@ -29,7 +29,6 @@
// is no significant 64 bit vectorization therefore SSE2 is the practical
// minimum for using this code.
//
// MMX: 64 bit vectors (Not used in cpuminer-opt)
// SSE2: 128 bit vectors (64 bit CPUs only, such as Intel Core2.
// AVX2: 256 bit vectors (Starting with Intel Haswell and AMD Ryzen)
// AVX512: 512 bit vectors (Starting with SkylakeX)
@@ -217,9 +216,6 @@
#include "simd-utils/simd-int.h"
// x86_64 MMX 64 bit vectors
#include "simd-utils/simd-64.h"
// x86_64 SSE2 128 bit vectors
#include "simd-utils/simd-128.h"

View File

@@ -1,193 +0,0 @@
#if !defined(SIMD_64_H__)
#define SIMD_64_H__ 1
#if defined(__x86_64__) && defined(__MMX__)
////////////////////////////////////////////////////////////////
//
// 64 bit MMX vectors.
//
// This code is not used anywhere annd likely never will. It's intent was
// to support 2 way parallel hashing using MMX, or NEON for 32 bit hash
// functions, but hasn't been implementedwas never implemented.
//
// MMX is being deprecated by compilers, all intrinsics will be converted to use SSE
// registers and instructions. MMX will still be available using ASM.
// For backward compatibility it's likely the compiler won't allow mixing explicit SSE
// with promoted MMX. It is therefore preferable to implement all 64 bit vector code
// using explicit SSE with the upper 64 bits being ignored.
// Using SSE for 64 bit vectors will complicate loading arrays from memory which will
// always load 128 bits. Odd indexes will need to be extracted from the upper 64 bits
// of the even index SSE register.
// In most cases the exiting 4x32 SSE code can be used with 2 lanes being ignored
// making ths file obsolete.
#define v64_t __m64
#define v64u32_t v64_t
#define v64_load _mm_load_si64
#define v64_store _mm_store_si64
#define v64_64(i64) ((__m64)(i64))
#define v64_32 _mm_set1_pi32
#define v64_16 _mm_set1_pi16
#define v64_8 _mm_set1_pi8
#define v64_add32 _mm_add_pi32
#define v64_add16 _mm_add_pi16
#define v64_add8 _mm_add_pi8
#define v64_mul32 _mm_mullo_pi32
#define v64_mul16 _mm_mullo_pi16
// compare
#define v64_cmpeq32 _mm_cmpeq_epi32
#define v64_cmpeq16 _mm_cmpeq_epi16
#define v64_cmpeq8 _mm_cmpeq_epi8
#define v64_cmpgt32 _mm_cmpgt_epi32
#define v64_cmpgt16 _mm_cmpgt_epi16
#define v64_cmpgt8 _mm_cmpgt_epi8
#define v64_cmplt32 _mm_cmplt_epi32
#define v64_cmplt16 _mm_cmplt_epi16
#define v64_cmplt8 _mm_cmplt_epi8
// bit shift
#define v64_sl32 _mm_slli_epi32
#define v64_sl16 _mm_slli_epi16
#define v64_sl8 _mm_slli_epi8
#define v64_sr32 _mm_srli_epi32
#define v64_sr16 _mm_srli_epi16
#define v64_sr8 _mm_srli_epi8
#define v64_sra32 _mm_srai_epi32
#define v64_sra16 _mm_srai_epi16
#define v64_sra8 _mm_srai_epi8
#define v64_alignr8 _mm_alignr_pi8
#define v64_unpacklo32 _mm_unpacklo_pi32
#define v64_unpackhi32 _mm_unpackhi_pi32
#define v64_unpacklo16 _mm_unpacklo_pi16
#define v64_unpackhi16 _mm_unpacklhi_pi16
#define v64_unpacklo8 _mm_unpacklo_pi8
#define v64_unpackhi8 _mm_unpackhi_pi16
// Pseudo constants
#define v64_zero _mm_setzero_si64()
#define v64_one_64 _mm_set_pi32( 0UL, 1UL )
#define v64_one_32 v64_32( 1UL )
#define v64_one_16 v64_16( 1U )
#define v64_one_8 v64_8( 1U );
#define v64_neg1 v64_32( 0xFFFFFFFFUL )
#define casti_v64(p,i) (((v64_t*)(p))[(i)])
// Bitwise not: ~(a)
//#define mm64_not( a ) _mm_xor_si64( (__m64)a, m64_neg1 )
#define v64_not( a ) ( (v64_t)( ~( (uint64_t)(a) ) )
/*
// Unary negate elements
#define mm64_negate_32( v ) _mm_sub_pi32( m64_zero, v )
#define mm64_negate_16( v ) _mm_sub_pi16( m64_zero, v )
#define mm64_negate_8( v ) _mm_sub_pi8( m64_zero, v )
*/
static inline void v64_memset_zero( __m64 *dst, const int n )
{ for ( int i = 0; i < n; i++ ) dst[i] = v64_zero; }
static inline void v64_memset( __m64 *dst, const __m64 a, const int n )
{ for ( int i = 0; i < n; i++ ) dst[i] = a; }
static inline void v64_memcpy( __m64 *dst, const __m64 *src, const int n )
{ for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; }
#define v64_or _mm_or_si64
#define v64_and _mm_and_si64
#define v64_xor _mm_xor_si64
#define v64_andnot _mm_andnot_si64
#define v64_xor3( v2, v1, v0 ) v64_xor( v2, v64_andnot( v1, v0 ) )
#define v64_xorandnot( v2, v1, v0 ) v64_xor( v2, v64_andnot( v1, v0 ) )
// Rotate bits in packed elements of 64 bit vector
#define v64_rol64( a, n ) \
_mm_or_si64( _mm_slli_si64( a, n ), \
_mm_srli_si64( a, 64-(n) ) )
#define v64_ror64( a, n ) \
_mm_or_si64( _mm_srli_si64( a, n ), \
_mm_slli_si64( a, 64-(n) ) )
#define v64_rol32( a, n ) \
_mm_or_si64( _mm_slli_pi32( a, n ), \
_mm_srli_pi32( a, 32-(n) ) )
#define v64_ror32( a, n ) \
_mm_or_si64( _mm_srli_pi32( a, n ), \
_mm_slli_pi32( a, 32-(n) ) )
#define v64_rol16( a, n ) \
_mm_or_si64( _mm_slli_pi16( a, n ), \
_mm_srli_pi16( a, 16-(n) ) )
#define v64_ror16( a, n ) \
_mm_or_si64( _mm_srli_pi16( a, n ), \
_mm_slli_pi16( a, 16-(n) ) )
// Rotate packed elements accross lanes. Useful for byte swap and byte
// rotation.
#if defined(__SSE__)
// Swap hi & lo 32 bits.
#define v64_swap32( a ) _mm_shuffle_pi16( a, 0x4e )
#define v64_shulfr16( a ) _mm_shuffle_pi16( a, 0x39 )
#define v64_shufll16( a ) _mm_shuffle_pi16( a, 0x93 )
// Swap hi & lo 16 bits of each 32 bit element
#define v64_swap32_16( a ) _mm_shuffle_pi16( a, 0xb1 )
#endif // SSE
#if defined(__SSSE3__)
// Endian byte swap packed elements
#define v64_bswap32( v ) \
_mm_shuffle_pi8( v, (__m64)0x0405060700010203 )
#define v64_bswap16( v ) \
_mm_shuffle_pi8( v, (__m64)0x0607040502030001 );
// Rotate right by c bytes
static inline v64_t v64_shuflr_x8( __m64 v, const int c )
{ return _mm_alignr_pi8( v, v, c ); }
#else
#define v64_bswap32( v ) \
_mm_set_pi32( __builtin_bswap32( ((uint32_t*)&v)[1] ), \
__builtin_bswap32( ((uint32_t*)&v)[0] ) )
#define v64_bswap16( v ) \
_mm_set_pi16( __builtin_bswap16( ((uint16_t*)&v)[3] ), \
__builtin_bswap16( ((uint16_t*)&v)[2] ), \
__builtin_bswap16( ((uint16_t*)&v)[1] ), \
__builtin_bswap16( ((uint16_t*)&v)[0] ) )
#endif // SSSE3
#define v64_blendv( v1, v0, mask ) \
v64_or( v64_and( mask, v1 ), v64_andnot( mask, v0 ) )
#endif // MMX
#endif // SIMD_64_H__

View File

@@ -19,6 +19,9 @@ static inline uint64_t bswap_64( uint64_t a )
return b;
}
// This produces warnings from clang, but its suggested workaround
// "rev32 %w0, %w1\n\t" produced errors instead. GCC doesn't complain and
// it works as is on both.
static inline uint32_t bswap_32( uint32_t a )
{
uint32_t b;

View File

@@ -16,8 +16,8 @@
#include "miner.h"
#include "simd-utils.h"
// missing on mingw on arm
#if defined(__aarch64__) && !defined(WIN32)
// Missing on MinGW, MacOS
#if defined(__aarch64__) && !defined(WIN32) && !defined(__APPLE__)
#define ARM_AUXV
#endif
@@ -28,7 +28,7 @@
#include <sys/prctl.h>
#endif
#ifndef WIN32
#if !(defined(WIN32) || defined(__APPLE__))
// 1035g1: /sys/devices/platform/coretemp.0/hwmon/hwmon3/temp1_input
// 1035g1: /sys/class/hwmon/hwmon1/temp1_input wrong temp
@@ -152,7 +152,7 @@ static inline void linux_cpu_hilo_freq( float *lo, float *hi )
static inline float cpu_temp( int core )
{
#ifdef WIN32
#if defined(WIN32) || defined(__APPLE__)
return 0.;
#else
return linux_cputemp( core );
@@ -161,7 +161,7 @@ static inline float cpu_temp( int core )
static inline uint32_t cpu_clock( int core )
{
#ifdef WIN32
#if defined(WIN32) || defined(__APPLE__)
return 0;
#else
return linux_cpufreq( core );
@@ -280,8 +280,8 @@ static inline int cpu_fanpercent()
#define FMA3_mask (FMA3_Flag|AVX_mask)
#define AVX512_mask (AVX512_VL_Flag|AVX512_BW_Flag|AVX512_DQ_Flag|AVX512_F_Flag)
#if defined(__x86_64__)
static inline void cpuid( unsigned int leaf, unsigned int subleaf,
unsigned int output[4] )
{
@@ -965,7 +965,7 @@ static inline unsigned int avx10_vector_length()
return 0;
}
// ARM SVE vector register length
// ARM SVE vector register length, converted from bytes to bits.
static inline int sve_vector_length()
{
#if defined(ARM_AUXV)

View File

@@ -11,16 +11,11 @@
export LOCAL_LIB="$HOME/usr/lib"
export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --host=x86_64-w64-mingw32"
#export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --with-crypto=$LOCAL_LIB/openssl --host=x86_64-w64-mingw32"
export MINGW_LIB="/usr/x86_64-w64-mingw32/lib"
# set correct gcc version
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
# used by GCC
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs"
#export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
# Support for Windows 7 CPU groups, AES sometimes not included in -march
# CPU groups disabled due to incompatibilities between Intel and AMD CPUs.
#export DEFAULT_CFLAGS="-maes -O3 -Wall -D_WIN32_WINNT=0x0601"
export DEFAULT_CFLAGS="-maes -O3 -Wall"
export DEFAULT_CFLAGS_OLD="-O3 -Wall"
@@ -40,7 +35,6 @@ cp $MINGW_LIB/zlib1.dll release/
cp $MINGW_LIB/libwinpthread-1.dll release/
cp $GCC_MINGW_LIB/libstdc++-6.dll release/
cp $GCC_MINGW_LIB/libgcc_s_seh-1.dll release/
#cp ./../libcrypto-1_1-x64.dll release/
cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
# Start building...