mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.9.1.1
This commit is contained in:
@@ -11,6 +11,10 @@
|
||||
#include <sys/endian.h>
|
||||
#endif
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include "tmmintrin.h"
|
||||
#include "smmintrin.h"
|
||||
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
#include <sys/endian.h>
|
||||
#endif
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include "tmmintrin.h"
|
||||
#include "smmintrin.h"
|
||||
#include "immintrin.h"
|
||||
|
||||
@@ -91,7 +91,7 @@ extern "C"{
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
static const sph_u64 RC[] = {
|
||||
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
||||
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
||||
@@ -106,7 +106,7 @@ static const sph_u64 RC[] = {
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
||||
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
||||
};
|
||||
|
||||
*/
|
||||
#define kekDECL_STATE \
|
||||
sph_u64 keca00, keca01, keca02, keca03, keca04; \
|
||||
sph_u64 keca10, keca11, keca12, keca13, keca14; \
|
||||
@@ -756,6 +756,20 @@ static const sph_u64 RC[] = {
|
||||
* tested faster saving space
|
||||
*/
|
||||
#define KECCAK_F_1600_ do { \
|
||||
static const sph_u64 RC[] = { \
|
||||
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082), \
|
||||
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000), \
|
||||
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001), \
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009), \
|
||||
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088), \
|
||||
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A), \
|
||||
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B), \
|
||||
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003), \
|
||||
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080), \
|
||||
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A), \
|
||||
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080), \
|
||||
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008) \
|
||||
}; \
|
||||
int j; \
|
||||
for (j = 0; j < 24; j += 4) { \
|
||||
KF_ELT( 0, 1, RC[j + 0]); \
|
||||
@@ -791,7 +805,7 @@ static const sph_u64 RC[] = {
|
||||
/* load initial constants */
|
||||
#define KEC_I
|
||||
|
||||
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
|
||||
//static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 };
|
||||
/*
|
||||
unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
||||
*/
|
||||
@@ -799,6 +813,7 @@ static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0
|
||||
/* load hash for loop */
|
||||
#define KEC_U \
|
||||
do { \
|
||||
static unsigned char keczword[8] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }; \
|
||||
/*memcpy(hashbuf, hash, 64); */ \
|
||||
memcpy(hash + 64, keczword, 8); \
|
||||
} while (0);
|
||||
|
||||
@@ -57,7 +57,7 @@ bool init_lyra2rev2_ctx();
|
||||
|
||||
/////////////////////////
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
#define LYRA2Z_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
|
||||
@@ -91,7 +91,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
|
||||
|
||||
#elif defined(__SSE2__)
|
||||
#elif defined(__SSE4_2__)
|
||||
|
||||
// process 2 columns in parallel
|
||||
// returns void, all args updated
|
||||
@@ -108,7 +108,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
G_2X64( s1, s3, s5, s7 ); \
|
||||
mm128_rol1x64_256( s2, s3 ); \
|
||||
mm128_ror1x64_256( s2, s3 ); \
|
||||
mm128_swap128_256( s4, s5 ); \
|
||||
mm128_rol1x64_256( s6, s7 ); \
|
||||
G_2X64( s0, s2, s4, s6 ); \
|
||||
@@ -132,7 +132,7 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
|
||||
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
|
||||
|
||||
|
||||
#endif // AVX2
|
||||
#endif // AVX2 else SSE4_2
|
||||
|
||||
// Scalar
|
||||
//Blake2b's G function
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@@ -716,4 +716,4 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
#endif // __SSE4_2__
|
||||
#endif // __SSE2__
|
||||
|
||||
@@ -44,7 +44,8 @@
|
||||
#include "sph_types.h"
|
||||
#include "avxdefs.h"
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
//#if defined(__SSE4_2__)
|
||||
|
||||
//#define SPH_SIZE_sha256 256
|
||||
|
||||
|
||||
@@ -108,7 +108,9 @@ int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(SHA256T_4WAY)
|
||||
#endif
|
||||
|
||||
#if defined(SHA256T_4WAY)
|
||||
|
||||
static __thread sha256_4way_context sha256_ctx4 __attribute__ ((aligned (64)));
|
||||
|
||||
|
||||
@@ -3,15 +3,15 @@
|
||||
bool register_sha256t_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(SHA256T_8WAY)
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t_8way;
|
||||
gate->hash = (void*)&sha256t_8way_hash;
|
||||
#elif defined(SHA256T_4WAY)
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t_4way;
|
||||
gate->hash = (void*)&sha256t_4way_hash;
|
||||
#else
|
||||
gate->optimizations = SSE42_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->optimizations = SSE2_OPT | AVX2_OPT | SHA_OPT;
|
||||
gate->scanhash = (void*)&scanhash_sha256t;
|
||||
gate->hash = (void*)&sha256t_hash;
|
||||
#endif
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
|
||||
// Override multi way on ryzen, SHA is better.
|
||||
#if !defined(RYZEN_)
|
||||
#if defined(__SSE4_2__)
|
||||
//#if defined(__SSE4_2__)
|
||||
#if defined(__SSE2__)
|
||||
#define SHA256T_4WAY
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
@@ -22,7 +23,7 @@ void sha256t_8way_hash( void *output, const void *input );
|
||||
int scanhash_sha256t_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined (SHA256T_4WAY)
|
||||
#elif defined(SHA256T_4WAY)
|
||||
|
||||
void sha256t_4way_hash( void *output, const void *input );
|
||||
int scanhash_sha256t_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
@@ -5,8 +5,6 @@
|
||||
#include <stdio.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#if !defined(SHA256T_4WAY)
|
||||
|
||||
static __thread SHA256_CTX sha256t_ctx __attribute__ ((aligned (64)));
|
||||
|
||||
void sha256t_midstate( const void* input )
|
||||
@@ -100,4 +98,3 @@ int scanhash_sha256t( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -819,10 +819,7 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
// uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
@@ -855,18 +852,23 @@ int scanhash_sonoa_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
nonces[ num_found++ ] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE,
|
||||
"Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id, lane );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -242,8 +242,6 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
@@ -277,18 +275,23 @@ int scanhash_x17_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
nonces[ num_found++ ] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE,
|
||||
"Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id, lane );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce - 4 ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -528,7 +528,7 @@ static volatile uint64_t Smask2var = Smask2;
|
||||
/* 64-bit without AVX. This relies on out-of-order execution and register
|
||||
* renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g.,
|
||||
* it runs great on Haswell. */
|
||||
#warning "Note: using x86-64 inline assembly for pwxform. That's great."
|
||||
//#warning "Note: using x86-64 inline assembly for pwxform. That's great."
|
||||
#undef MAYBE_MEMORY_BARRIER
|
||||
#define MAYBE_MEMORY_BARRIER \
|
||||
__asm__("" : : : "memory");
|
||||
|
||||
Reference in New Issue
Block a user