mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.1.1
This commit is contained in:
@@ -38,6 +38,16 @@ supported.
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v3.9.1.1
|
||||||
|
|
||||||
|
Fixed lyra2v3 AVX and below.
|
||||||
|
|
||||||
|
Compiling on Windows using Cygwin now works. Simply use "./build.sh"
|
||||||
|
just like on Linux. It isn't portable therefore the binaries package will
|
||||||
|
continue to use the existing procedure.
|
||||||
|
The Cygwin procedfure will be documented in more detail later and will
|
||||||
|
include a list of packages that need to be installed.
|
||||||
|
|
||||||
v3.9.1
|
v3.9.1
|
||||||
|
|
||||||
Fixed AVX2 version of anime algo.
|
Fixed AVX2 version of anime algo.
|
||||||
|
|||||||
@@ -11,6 +11,10 @@
|
|||||||
#include <sys/endian.h>
|
#include <sys/endian.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__CYGWIN__)
|
||||||
|
#include <endian.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "tmmintrin.h"
|
#include "tmmintrin.h"
|
||||||
#include "smmintrin.h"
|
#include "smmintrin.h"
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,10 @@
|
|||||||
#include <sys/endian.h>
|
#include <sys/endian.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__CYGWIN__)
|
||||||
|
#include <endian.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "tmmintrin.h"
|
#include "tmmintrin.h"
|
||||||
#include "smmintrin.h"
|
#include "smmintrin.h"
|
||||||
#include "immintrin.h"
|
#include "immintrin.h"
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ extern "C"{
|
|||||||
#pragma warning (disable: 4146)
|
#pragma warning (disable: 4146)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
static const sph_u64 RC[] = {
|
static const sph_u64 RC[] = {
|
||||||
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
||||||
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
||||||
@@ -106,7 +106,7 @@ static const sph_u64 RC[] = {
|
|||||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
||||||
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
#define kekDECL_STATE \
|
#define kekDECL_STATE \
|
||||||
sph_u64 keca00, keca01, keca02, keca03, keca04; \
|
sph_u64 keca00, keca01, keca02, keca03, keca04; \
|
||||||
sph_u64 keca10, keca11, keca12, keca13, keca14; \
|
sph_u64 keca10, keca11, keca12, keca13, keca14; \
|
||||||
@@ -756,6 +756,20 @@ static const sph_u64 RC[] = {
|
|||||||
* tested faster saving space
|
* tested faster saving space
|
||||||
*/
|
*/
|
||||||
#define KECCAK_F_1600_ do { \
|
#define KECCAK_F_1600_ do { \
|
||||||
|
static const sph_u64 RC[] = { \
|
||||||
|
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082), \
|
||||||
|
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000), \
|
||||||
|
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001), \
|
||||||
|
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009), \
|
||||||
|
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088), \
|
||||||
|
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A), \
|
||||||
|
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B), \
|
||||||
|
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003), \
|
||||||
|
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080), \
|
||||||
|
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A), \
|
||||||
|
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080), \
|
||||||
|
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008) \
|
||||||
|
}; \
|
||||||
int j; \
|
int j; \
|
||||||
for (j = 0; j < 24; j += 4) { \
|
for (j = 0; j < 24; j += 4) { \
|
||||||
KF_ELT( 0, 1, RC[j + 0]); \
|
KF_ELT( 0, 1, RC[j + 0]); \
|
||||||
@@ -791,7 +805,7 @@ static const sph_u64 RC[] = {
|
|||||||
/* load initial constants */
|
/* load initial constants */
|
||||||
#define KEC_I
|
#define KEC_I
|
||||||
|
|
||||||
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
|
//static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
|
||||||
/*
|
/*
|
||||||
unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
||||||
*/
|
*/
|
||||||
@@ -799,6 +813,7 @@ static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0
|
|||||||
/* load hash for loop */
|
/* load hash for loop */
|
||||||
#define KEC_U \
|
#define KEC_U \
|
||||||
do { \
|
do { \
|
||||||
|
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
||||||
/*memcpy(hashbuf, hash, 64); */ \
|
/*memcpy(hashbuf, hash, 64); */ \
|
||||||
memcpy(hash + 64, keczword, 8); \
|
memcpy(hash + 64, keczword, 8); \
|
||||||
} while (0);
|
} while (0);
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ bool init_lyra2rev2_ctx();
|
|||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
|
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE2__)
|
||||||
#define LYRA2Z_4WAY
|
#define LYRA2Z_4WAY
|
||||||
#endif
|
#endif
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
|||||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||||
|
|
||||||
#elif defined(__SSE2__)
|
#elif defined(__SSE4_2__)
|
||||||
|
|
||||||
// process 2 columns in parallel
|
// process 2 columns in parallel
|
||||||
// returns void, all args updated
|
// returns void, all args updated
|
||||||
@@ -108,7 +108,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
|||||||
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||||
G_2X64( s0, s2, s4, s6 ); \
|
G_2X64( s0, s2, s4, s6 ); \
|
||||||
G_2X64( s1, s3, s5, s7 ); \
|
G_2X64( s1, s3, s5, s7 ); \
|
||||||
mm128_rol1x64_256( s2, s3 ); \
|
mm128_ror1x64_256( s2, s3 ); \
|
||||||
mm128_swap128_256( s4, s5 ); \
|
mm128_swap128_256( s4, s5 ); \
|
||||||
mm128_rol1x64_256( s6, s7 ); \
|
mm128_rol1x64_256( s6, s7 ); \
|
||||||
G_2X64( s0, s2, s4, s6 ); \
|
G_2X64( s0, s2, s4, s6 ); \
|
||||||
@@ -132,7 +132,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
|||||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||||
|
|
||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2 else SSE4_2
|
||||||
|
|
||||||
// Scalar
|
// Scalar
|
||||||
//Blake2b's G function
|
//Blake2b's G function
|
||||||
|
|||||||
@@ -30,7 +30,7 @@
|
|||||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE2__)
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -716,4 +716,4 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif // __AVX2__
|
#endif // __AVX2__
|
||||||
#endif // __SSE4_2__
|
#endif // __SSE2__
|
||||||
|
|||||||
@@ -44,7 +44,8 @@
|
|||||||
#include "sph_types.h"
|
#include "sph_types.h"
|
||||||
#include "avxdefs.h"
|
#include "avxdefs.h"
|
||||||
|
|
||||||
#if defined(__SSE4_2__)
|
#if defined(__SSE2__)
|
||||||
|
//#if defined(__SSE4_2__)
|
||||||
|
|
||||||
//#define SPH_SIZE_sha256 256
|
//#define SPH_SIZE_sha256 256
|
||||||
|
|
||||||
|
|||||||
@@ -108,7 +108,9 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(SHA256T_4WAY)
|
#endif
|
||||||
|
|
||||||
|
#if defined(SHA256T_4WAY)
|
||||||
|
|
||||||
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
|
|||||||
@@ -3,15 +3,15 @@
|
|||||||
bool register_sha256t_algo( algo_gate_t* gate )
|
bool register_sha256t_algo( algo_gate_t* gate )
|
||||||
{
|
{
|
||||||
#if defined(SHA256T_8WAY)
|
#if defined(SHA256T_8WAY)
|
||||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||||
gate->hash = (void*)&sha256t_8way_hash;
|
gate->hash = (void*)&sha256t_8way_hash;
|
||||||
#elif defined(SHA256T_4WAY)
|
#elif defined(SHA256T_4WAY)
|
||||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||||
gate->hash = (void*)&sha256t_4way_hash;
|
gate->hash = (void*)&sha256t_4way_hash;
|
||||||
#else
|
#else
|
||||||
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||||
gate->scanhash = (void*)&scanhash_sha256t;
|
gate->scanhash = (void*)&scanhash_sha256t;
|
||||||
gate->hash = (void*)&sha256t_hash;
|
gate->hash = (void*)&sha256t_hash;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -6,7 +6,8 @@
|
|||||||
|
|
||||||
// Override multi way on ryzen, SHA is better.
|
// Override multi way on ryzen, SHA is better.
|
||||||
#if !defined(RYZEN_)
|
#if !defined(RYZEN_)
|
||||||
#if defined(__SSE4_2__)
|
//#if defined(__SSE4_2__)
|
||||||
|
#if defined(__SSE2__)
|
||||||
#define SHA256T_4WAY
|
#define SHA256T_4WAY
|
||||||
#endif
|
#endif
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
@@ -22,7 +23,7 @@ void sha256t_8way_hash( void *output, const void *input );
|
|||||||
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
|
|
||||||
#elif defined (SHA256T_4WAY)
|
#elif defined(SHA256T_4WAY)
|
||||||
|
|
||||||
void sha256t_4way_hash( void *output, const void *input );
|
void sha256t_4way_hash( void *output, const void *input );
|
||||||
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||||
|
|||||||
@@ -5,8 +5,6 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
#if !defined(SHA256T_4WAY)
|
|
||||||
|
|
||||||
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
||||||
|
|
||||||
void sha256t_midstate( const void* input )
|
void sha256t_midstate( const void* input )
|
||||||
@@ -100,4 +98,3 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|||||||
@@ -819,10 +819,7 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t *nonces = work->nonces;
|
|
||||||
int num_found = 0;
|
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
// uint32_t *noncep = vdata + 73; // 9*8 + 1
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||||
@@ -855,18 +852,23 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
nonces[ num_found++ ] = n + lane;
|
|
||||||
work_set_target_ratio( work, lane_hash );
|
work_set_target_ratio( work, lane_hash );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE,
|
||||||
|
"Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id, lane );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -242,8 +242,6 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t n = pdata[19];
|
uint32_t n = pdata[19];
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t *nonces = work->nonces;
|
|
||||||
int num_found = 0;
|
|
||||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@@ -277,18 +275,23 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if ( fulltest( lane_hash, ptarget ) )
|
if ( fulltest( lane_hash, ptarget ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
pdata[19] = n + lane;
|
||||||
nonces[ num_found++ ] = n + lane;
|
|
||||||
work_set_target_ratio( work, lane_hash );
|
work_set_target_ratio( work, lane_hash );
|
||||||
|
if ( submit_work( mythr, work ) )
|
||||||
|
applog( LOG_NOTICE,
|
||||||
|
"Share %d submitted by thread %d, lane %d.",
|
||||||
|
accepted_share_count + rejected_share_count + 1,
|
||||||
|
thr_id, lane );
|
||||||
|
else
|
||||||
|
applog( LOG_WARNING, "Failed to submit share." );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||||
&& !work_restart[thr_id].restart );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*hashes_done = n - first_nonce + 1;
|
*hashes_done = n - first_nonce + 1;
|
||||||
return num_found;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -528,7 +528,7 @@ static volatile uint64_t Smask2var = Smask2;
|
|||||||
/* 64-bit without AVX. This relies on out-of-order execution and register
|
/* 64-bit without AVX. This relies on out-of-order execution and register
|
||||||
* renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g.,
|
* renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g.,
|
||||||
* it runs great on Haswell. */
|
* it runs great on Haswell. */
|
||||||
#warning "Note: using x86-64 inline assembly for pwxform. That's great."
|
//#warning "Note: using x86-64 inline assembly for pwxform. That's great."
|
||||||
#undef MAYBE_MEMORY_BARRIER
|
#undef MAYBE_MEMORY_BARRIER
|
||||||
#define MAYBE_MEMORY_BARRIER \
|
#define MAYBE_MEMORY_BARRIER \
|
||||||
__asm__("" : : : "memory");
|
__asm__("" : : : "memory");
|
||||||
|
|||||||
21
avxdefs.h
21
avxdefs.h
@@ -173,6 +173,7 @@ typedef union _m64_v16 m64_v16;
|
|||||||
// Unary negate elements
|
// Unary negate elements
|
||||||
#define mm64_negate_32( v ) _mm_sub_pi32( m64_zero, (__m64)v )
|
#define mm64_negate_32( v ) _mm_sub_pi32( m64_zero, (__m64)v )
|
||||||
#define mm64_negate_16( v ) _mm_sub_pi16( m64_zero, (__m64)v )
|
#define mm64_negate_16( v ) _mm_sub_pi16( m64_zero, (__m64)v )
|
||||||
|
#define mm64_negate_8( v ) _mm_sub_pi8( m64_zero, (__m64)v )
|
||||||
|
|
||||||
// Rotate bits in packed elements of 64 bit vector
|
// Rotate bits in packed elements of 64 bit vector
|
||||||
#define mm64_rol_32( a, n ) \
|
#define mm64_rol_32( a, n ) \
|
||||||
@@ -206,15 +207,32 @@ typedef union _m64_v16 m64_v16;
|
|||||||
#if defined(__SSSE3__)
|
#if defined(__SSSE3__)
|
||||||
|
|
||||||
// Endian byte swap packed elements
|
// Endian byte swap packed elements
|
||||||
|
// A vectorized version of the u64 bswap, use when data already in MMX reg.
|
||||||
|
#define mm64_bswap_64( v ) \
|
||||||
|
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 0,1,2,3,4,5,6,7 ) )
|
||||||
|
|
||||||
#define mm64_bswap_32( v ) \
|
#define mm64_bswap_32( v ) \
|
||||||
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 4,5,6,7, 0,1,2,3 ) )
|
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 4,5,6,7, 0,1,2,3 ) )
|
||||||
|
|
||||||
#define mm64_bswap_16( v ) \
|
#define mm64_bswap_16( v ) \
|
||||||
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 6,7, 4,5, 2,3, 0,1 ) );
|
_mm_shuffle_pi8( (__m64)v, _mm_set_pi8( 6,7, 4,5, 2,3, 0,1 ) );
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define mm64_bswap_64( v ) \
|
||||||
|
(__m64)__builtin_bswap64( (uint64_t)v )
|
||||||
|
|
||||||
|
// Looks clumsy but hopefully it works.
|
||||||
|
#define mm64_bswap_32( v ) \
|
||||||
|
_mm_set_pi32( __builtin_bswap32( ((uint32_t*)v)[1] ), \
|
||||||
|
__builtin_bswap32( ((uint32_t*)v)[0] ) )
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Invert vector: {3,2,1,0} -> {0,1,2,3}
|
// Invert vector: {3,2,1,0} -> {0,1,2,3}
|
||||||
|
// Invert_64 is the same as bswap64
|
||||||
|
// Invert_32 is the same as swap32
|
||||||
|
|
||||||
#define mm64_invert_16( v ) _mm_shuffle_pi16( (__m64)v, 0x1b )
|
#define mm64_invert_16( v ) _mm_shuffle_pi16( (__m64)v, 0x1b )
|
||||||
|
|
||||||
#if defined(__SSSE3__)
|
#if defined(__SSSE3__)
|
||||||
@@ -1899,7 +1917,7 @@ do { \
|
|||||||
|
|
||||||
#endif // AVX512F
|
#endif // AVX512F
|
||||||
|
|
||||||
#if 0
|
#if 1
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
// Compile test.
|
// Compile test.
|
||||||
@@ -1919,6 +1937,7 @@ static inline __m64 mmx_compile_test( __m64 a )
|
|||||||
m = _mm_shuffle_pi8( m, (__m64)0x0102030405060708 );
|
m = _mm_shuffle_pi8( m, (__m64)0x0102030405060708 );
|
||||||
i = (uint64_t) mm64_ror_32( (__m64)i, 7 );
|
i = (uint64_t) mm64_ror_32( (__m64)i, 7 );
|
||||||
casti_m64( n, 2 ) = m;
|
casti_m64( n, 2 ) = m;
|
||||||
|
m = (__m64)__builtin_bswap64( (uint64_t)m );
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
22
configure
vendored
22
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.1.
|
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.9.1.1.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='3.9.1'
|
PACKAGE_VERSION='3.9.1.1'
|
||||||
PACKAGE_STRING='cpuminer-opt 3.9.1'
|
PACKAGE_STRING='cpuminer-opt 3.9.1.1'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 3.9.1 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 3.9.1.1 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1404,7 +1404,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 3.9.1:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 3.9.1.1:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1509,7 +1509,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 3.9.1
|
cpuminer-opt configure 3.9.1.1
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 3.9.1, which was
|
It was created by cpuminer-opt $as_me 3.9.1.1, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2993,7 +2993,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='3.9.1'
|
VERSION='3.9.1.1'
|
||||||
|
|
||||||
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
cat >>confdefs.h <<_ACEOF
|
||||||
@@ -5884,7 +5884,7 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
# GC2 for GNU static
|
# GC2 for GNU static
|
||||||
if test "x$OS" = "xWindows_NT" ; then
|
if test "x$have_win32" = "xtrue" ; then
|
||||||
# MinGW
|
# MinGW
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
|
||||||
$as_echo_n "checking for pthread_create in -lpthread... " >&6; }
|
$as_echo_n "checking for pthread_create in -lpthread... " >&6; }
|
||||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 3.9.1, which was
|
This file was extended by cpuminer-opt $as_me 3.9.1.1, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 3.9.1
|
cpuminer-opt config.status 3.9.1.1
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [3.9.1])
|
AC_INIT([cpuminer-opt], [3.9.1.1])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
@@ -106,7 +106,7 @@ fi
|
|||||||
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
|
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
|
||||||
|
|
||||||
# GC2 for GNU static
|
# GC2 for GNU static
|
||||||
if test "x$OS" = "xWindows_NT" ; then
|
if test "x$have_win32" = "xtrue" ; then
|
||||||
# MinGW
|
# MinGW
|
||||||
AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthreadGC2",[])
|
AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthreadGC2",[])
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ export CONFIGURE_ARGS="--with-curl=$LOCAL_LIB/curl --with-crypto=$LOCAL_LIB/open
|
|||||||
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
||||||
|
|
||||||
# edit configure to fix pthread lib name for Windows.
|
# edit configure to fix pthread lib name for Windows.
|
||||||
sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
#sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
||||||
|
|
||||||
# make release directory and copy selected DLLs.
|
# make release directory and copy selected DLLs.
|
||||||
mkdir release
|
mkdir release
|
||||||
|
|||||||
Reference in New Issue
Block a user